Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] allow server cert rotation without a restart #1672

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ We use *breaking* word for marking changes that are not backward compatible (rel
- [#1758](https://github.com/thanos-io/thanos/pull/1758) Bucket: `thanos bucket web` now supports `--web.external-prefix` for proxying on a subpath.
- [#1770](https://github.com/thanos-io/thanos/pull/1770) Bucket: Add `--web.prefix-header` flags to allow for bucket UI to be accessible behind a reverse proxy.
- [#1668](https://github.com/thanos-io/thanos/pull/1668) Receiver: Added TLS options for both server and client remote write.
- [#1672](https://github.com/thanos-io/thanos/pull/1672) All components: Client and server certificates are auto reloaded when changed on disk.
- [#1672](https://github.com/thanos-io/thanos/pull/1672) Add a new `--grpc-server-max-connection-age` CLI option which controls how often to re-do the tls handshake and re-read the certificates. This in reality controls the keep alive so when the connection is closed it requires a new tls handshake.

### Fixed

Expand Down
6 changes: 5 additions & 1 deletion cmd/thanos/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"fmt"
"strings"
"time"

"github.com/thanos-io/thanos/pkg/extflag"

Expand All @@ -23,6 +24,7 @@ func regGRPCFlags(cmd *kingpin.CmdClause) (
grpcTLSSrvCert *string,
grpcTLSSrvKey *string,
grpcTLSSrvClientCA *string,
grpcMaxConnectionAge *time.Duration,
) {
grpcBindAddr = cmd.Flag("grpc-address", "Listen ip:port address for gRPC endpoints (StoreAPI). Make sure this address is routable from other components.").
Default("0.0.0.0:10901").String()
Expand All @@ -31,12 +33,14 @@ func regGRPCFlags(cmd *kingpin.CmdClause) (
grpcTLSSrvCert = cmd.Flag("grpc-server-tls-cert", "TLS Certificate for gRPC server, leave blank to disable TLS").Default("").String()
grpcTLSSrvKey = cmd.Flag("grpc-server-tls-key", "TLS Key for the gRPC server, leave blank to disable TLS").Default("").String()
grpcTLSSrvClientCA = cmd.Flag("grpc-server-tls-client-ca", "TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert)").Default("").String()
grpcMaxConnectionAge = cmd.Flag("grpc-server-max-connection-age", "The grpc server max connection age. This controls how often to re-read the tls certificates and redo the TLS handshake ").Default("1m").Duration()

return grpcBindAddr,
grpcGracePeriod,
grpcTLSSrvCert,
grpcTLSSrvKey,
grpcTLSSrvClientCA
grpcTLSSrvClientCA,
grpcMaxConnectionAge
}

func regHTTPFlags(cmd *kingpin.CmdClause) (httpBindAddr *string, httpGracePeriod *model.Duration) {
Expand Down
5 changes: 4 additions & 1 deletion cmd/thanos/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application) {
cmd := app.Command(comp.String(), "query node exposing PromQL enabled Query API with data retrieved from multiple store nodes")

httpBindAddr, httpGracePeriod := regHTTPFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd)

secure := cmd.Flag("grpc-client-tls-secure", "Use TLS when talking to the gRPC server").Default("false").Bool()
cert := cmd.Flag("grpc-client-tls-cert", "TLS Certificates to use to identify this client to the server").Default("").String()
Expand Down Expand Up @@ -137,6 +137,7 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application) {
*grpcCert,
*grpcKey,
*grpcClientCA,
*grpcMaxConnAge,
*secure,
*cert,
*key,
Expand Down Expand Up @@ -219,6 +220,7 @@ func runQuery(
grpcCert string,
grpcKey string,
grpcClientCA string,
grpcMaxConnAge time.Duration,
secure bool,
cert string,
key string,
Expand Down Expand Up @@ -408,6 +410,7 @@ func runQuery(
grpcserver.WithListen(grpcBindAddr),
grpcserver.WithGracePeriod(grpcGracePeriod),
grpcserver.WithTLSConfig(tlsCfg),
grpcserver.WithMaxConnAge(grpcMaxConnAge),
)

g.Add(func() error {
Expand Down
5 changes: 4 additions & 1 deletion cmd/thanos/receive.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application) {
cmd := app.Command(comp.String(), "Accept Prometheus remote write API requests and write to local tsdb (EXPERIMENTAL, this may change drastically without notice)")

httpBindAddr, httpGracePeriod := regHTTPFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd)

rwAddress := cmd.Flag("remote-write.address", "Address to listen on for remote write requests.").
Default("0.0.0.0:19291").String()
Expand Down Expand Up @@ -109,6 +109,7 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application) {
*grpcCert,
*grpcKey,
*grpcClientCA,
*grpcMaxConnAge,
*httpBindAddr,
time.Duration(*httpGracePeriod),
*rwAddress,
Expand Down Expand Up @@ -144,6 +145,7 @@ func runReceive(
grpcCert string,
grpcKey string,
grpcClientCA string,
grpcMaxConnAge time.Duration,
httpBindAddr string,
httpGracePeriod time.Duration,
rwAddress string,
Expand Down Expand Up @@ -370,6 +372,7 @@ func runReceive(
grpcserver.WithListen(grpcBindAddr),
grpcserver.WithGracePeriod(grpcGracePeriod),
grpcserver.WithTLSConfig(tlsCfg),
grpcserver.WithMaxConnAge(grpcMaxConnAge),
)
startGRPC <- struct{}{}
}
Expand Down
5 changes: 4 additions & 1 deletion cmd/thanos/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application) {
cmd := app.Command(comp.String(), "ruler evaluating Prometheus rules against given Query nodes, exposing Store API and storing old blocks in bucket")

httpBindAddr, httpGracePeriod := regHTTPFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd)

labelStrs := cmd.Flag("label", "Labels to be applied to all generated metrics (repeated). Similar to external labels for Prometheus, used to identify ruler and its blocks as unique source.").
PlaceHolder("<name>=\"<value>\"").Strings()
Expand Down Expand Up @@ -162,6 +162,7 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application) {
*grpcCert,
*grpcKey,
*grpcClientCA,
*grpcMaxConnAge,
*httpBindAddr,
time.Duration(*httpGracePeriod),
*webRoutePrefix,
Expand Down Expand Up @@ -199,6 +200,7 @@ func runRule(
grpcCert string,
grpcKey string,
grpcClientCA string,
grpcMaxConnAge time.Duration,
httpBindAddr string,
httpGracePeriod time.Duration,
webRoutePrefix string,
Expand Down Expand Up @@ -514,6 +516,7 @@ func runRule(
grpcserver.WithListen(grpcBindAddr),
grpcserver.WithGracePeriod(grpcGracePeriod),
grpcserver.WithTLSConfig(tlsCfg),
grpcserver.WithMaxConnAge(grpcMaxConnAge),
)

g.Add(func() error {
Expand Down
5 changes: 4 additions & 1 deletion cmd/thanos/sidecar.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application) {
cmd := app.Command(component.Sidecar.String(), "sidecar for Prometheus server")

httpBindAddr, httpGracePeriod := regHTTPFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd)

promURL := cmd.Flag("prometheus.url", "URL at which to reach Prometheus's API. For better performance use local network.").
Default("http://localhost:9090").URL()
Expand Down Expand Up @@ -82,6 +82,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application) {
*grpcCert,
*grpcKey,
*grpcClientCA,
*grpcMaxConnAge,
*httpBindAddr,
time.Duration(*httpGracePeriod),
*promURL,
Expand All @@ -106,6 +107,7 @@ func runSidecar(
grpcCert string,
grpcKey string,
grpcClientCA string,
grpcMaxConnAge time.Duration,
httpBindAddr string,
httpGracePeriod time.Duration,
promURL *url.URL,
Expand Down Expand Up @@ -254,6 +256,7 @@ func runSidecar(
grpcserver.WithListen(grpcBindAddr),
grpcserver.WithGracePeriod(grpcGracePeriod),
grpcserver.WithTLSConfig(tlsCfg),
grpcserver.WithMaxConnAge(grpcMaxConnAge),
)
g.Add(func() error {
statusProber.Ready()
Expand Down
5 changes: 4 additions & 1 deletion cmd/thanos/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func registerStore(m map[string]setupFunc, app *kingpin.Application) {
cmd := app.Command(component.Store.String(), "store node giving access to blocks in a bucket provider. Now supported GCS, S3, Azure, Swift and Tencent COS.")

httpBindAddr, httpGracePeriod := regHTTPFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd)
grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd)

dataDir := cmd.Flag("data-dir", "Data directory in which to cache remote blocks.").
Default("./data").String()
Expand Down Expand Up @@ -84,6 +84,7 @@ func registerStore(m map[string]setupFunc, app *kingpin.Application) {
*grpcCert,
*grpcKey,
*grpcClientCA,
*grpcMaxConnAge,
*httpBindAddr,
time.Duration(*httpGracePeriod),
uint64(*indexCacheSize),
Expand Down Expand Up @@ -117,6 +118,7 @@ func runStore(
grpcCert string,
grpcKey string,
grpcClientCA string,
grpcMaxConnAge time.Duration,
httpBindAddr string,
httpGracePeriod time.Duration,
indexCacheSizeBytes uint64,
Expand Down Expand Up @@ -246,6 +248,7 @@ func runStore(
grpcserver.WithListen(grpcBindAddr),
grpcserver.WithGracePeriod(grpcGracePeriod),
grpcserver.WithTLSConfig(tlsCfg),
grpcserver.WithMaxConnAge(grpcMaxConnAge),
)

g.Add(func() error {
Expand Down
4 changes: 4 additions & 0 deletions docs/components/query.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,10 @@ Flags:
TLS CA to verify clients against. If no client
CA is specified, there is no client
verification on server side. (tls.NoClientCert)
--grpc-server-max-connection-age=1m
The grpc server max connection age. This
controls how often to re-read the tls
certificates and redo the TLS handshake
--grpc-client-tls-secure Use TLS when talking to the gRPC server
--grpc-client-tls-cert="" TLS Certificates to use to identify this client
to the server
Expand Down
4 changes: 4 additions & 0 deletions docs/components/rule.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ Flags:
TLS CA to verify clients against. If no client
CA is specified, there is no client
verification on server side. (tls.NoClientCert)
--grpc-server-max-connection-age=1m
The grpc server max connection age. This
controls how often to re-read the tls
certificates and redo the TLS handshake
--label=<name>="<value>" ...
Labels to be applied to all generated metrics
(repeated). Similar to external labels for
Expand Down
4 changes: 4 additions & 0 deletions docs/components/sidecar.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ Flags:
TLS CA to verify clients against. If no client
CA is specified, there is no client
verification on server side. (tls.NoClientCert)
--grpc-server-max-connection-age=1m
The grpc server max connection age. This
controls how often to re-read the tls
certificates and redo the TLS handshake
--prometheus.url=http://localhost:9090
URL at which to reach Prometheus's API. For
better performance use local network.
Expand Down
4 changes: 4 additions & 0 deletions docs/components/store.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ Flags:
TLS CA to verify clients against. If no client
CA is specified, there is no client
verification on server side. (tls.NoClientCert)
--grpc-server-max-connection-age=1m
The grpc server max connection age. This
controls how often to re-read the tls
certificates and redo the TLS handshake
--data-dir="./data" Data directory in which to cache remote blocks.
--index-cache-size=250MB Maximum size of items held in the index cache.
--chunk-pool-size=2GB Maximum size of concurrently allocatable bytes
Expand Down
4 changes: 4 additions & 0 deletions pkg/server/grpc/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/status"
)

Expand Down Expand Up @@ -74,6 +75,9 @@ func New(logger log.Logger, reg prometheus.Registerer, tracer opentracing.Tracer
if options.tlsConfig != nil {
grpcOpts = append(grpcOpts, grpc.Creds(credentials.NewTLS(options.tlsConfig)))
}
if options.maxConnAge > 0 {
grpcOpts = append(grpcOpts, grpc.KeepaliveParams(keepalive.ServerParameters{MaxConnectionAge: options.maxConnAge}))
}
s := grpc.NewServer(grpcOpts...)

storepb.RegisterStoreServer(s, storeSrv)
Expand Down
8 changes: 8 additions & 0 deletions pkg/server/grpc/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

type options struct {
gracePeriod time.Duration
maxConnAge time.Duration
listen string

tlsConfig *tls.Config
Expand Down Expand Up @@ -45,3 +46,10 @@ func WithTLSConfig(cfg *tls.Config) Option {
o.tlsConfig = cfg
})
}

// WithMaxConnAge sets the maximum connection age for gRPC server.
func WithMaxConnAge(t time.Duration) Option {
return optionFunc(func(o *options) {
o.maxConnAge = t
})
}
104 changes: 0 additions & 104 deletions pkg/tls/options.go

This file was deleted.

Loading