From af7d97076c3bcce322288dac353eed361468c589 Mon Sep 17 00:00:00 2001 From: Jake <86763948+notfromstatefarm@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:02:10 -0400 Subject: [PATCH] fix: CMP manifest generation fails with ENHANCE_YOUR_CALM if over 40s (#9922) * fix: CMP manifest generation fails with ENHANCE_YOUR_CALM if over 40s Signed-off-by: notfromstatefarm <86763948+notfromstatefarm@users.noreply.github.com> * fix timeouts across all gRPC servers Signed-off-by: notfromstatefarm <86763948+notfromstatefarm@users.noreply.github.com> * use common consts Signed-off-by: notfromstatefarm <86763948+notfromstatefarm@users.noreply.github.com> --- cmpserver/server.go | 9 ++++++++- common/common.go | 7 +++++++ reposerver/server.go | 9 ++++++++- server/server.go | 7 +++++++ util/grpc/grpc.go | 4 +++- 5 files changed, 33 insertions(+), 3 deletions(-) diff --git a/cmpserver/server.go b/cmpserver/server.go index 636dbcd1f7339..f66da10605695 100644 --- a/cmpserver/server.go +++ b/cmpserver/server.go @@ -2,12 +2,13 @@ package cmpserver import ( "fmt" - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "net" "os" "os/signal" "syscall" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" grpc_logrus "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus" grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" @@ -24,6 +25,7 @@ import ( "github.com/argoproj/argo-cd/v2/server/version" "github.com/argoproj/argo-cd/v2/util/errors" grpc_util "github.com/argoproj/argo-cd/v2/util/grpc" + "google.golang.org/grpc/keepalive" ) // ArgoCDCMPServer is the config management plugin server implementation @@ -61,6 +63,11 @@ func NewServer(initConstants plugin.CMPServerInitConstants) (*ArgoCDCMPServer, e grpc.StreamInterceptor(grpc_middleware.ChainStreamServer(streamInterceptors...)), grpc.MaxRecvMsgSize(apiclient.MaxGRPCMessageSize), grpc.MaxSendMsgSize(apiclient.MaxGRPCMessageSize), + grpc.KeepaliveEnforcementPolicy( + keepalive.EnforcementPolicy{ + MinTime: common.GRPCKeepAliveEnforcementMinimum, + }, + ), } return &ArgoCDCMPServer{ diff --git a/common/common.go b/common/common.go index 4d92f1fb7193a..ac5c4768903fc 100644 --- a/common/common.go +++ b/common/common.go @@ -286,3 +286,10 @@ const ( // AnnotationApplicationRefresh is an annotation that is added when an ApplicationSet is requested to be refreshed by a webhook. The ApplicationSet controller will remove this annotation at the end of reconcilation. AnnotationApplicationSetRefresh = "argocd.argoproj.io/application-set-refresh" ) + +// gRPC settings +const ( + GRPCKeepAliveEnforcementMinimum = 10 * time.Second + // Keep alive is 2x enforcement minimum to ensure network jitter does not introduce ENHANCE_YOUR_CALM errors + GRPCKeepAliveTime = 2 * GRPCKeepAliveEnforcementMinimum +) diff --git a/reposerver/server.go b/reposerver/server.go index a6f5e3b04bdc2..b7cf4f9537a5f 100644 --- a/reposerver/server.go +++ b/reposerver/server.go @@ -3,10 +3,11 @@ package reposerver import ( "crypto/tls" "fmt" - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "os" "path/filepath" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" grpc_logrus "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus" grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" @@ -15,6 +16,7 @@ import ( "google.golang.org/grpc/credentials" "google.golang.org/grpc/health" "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/keepalive" "google.golang.org/grpc/reflection" "github.com/argoproj/argo-cd/v2/common" @@ -86,6 +88,11 @@ func NewServer(metricsServer *metrics.MetricsServer, cache *reposervercache.Cach grpc.StreamInterceptor(grpc_middleware.ChainStreamServer(streamInterceptors...)), grpc.MaxRecvMsgSize(apiclient.MaxGRPCMessageSize), grpc.MaxSendMsgSize(apiclient.MaxGRPCMessageSize), + grpc.KeepaliveEnforcementPolicy( + keepalive.EnforcementPolicy{ + MinTime: common.GRPCKeepAliveEnforcementMinimum, + }, + ), } // We do allow for non-TLS servers to be created, in case of mTLS will be diff --git a/server/server.go b/server/server.go index 048b5b38c29aa..96be89078ddd2 100644 --- a/server/server.go +++ b/server/server.go @@ -25,6 +25,7 @@ import ( golang_proto "github.com/golang/protobuf/proto" netCtx "context" + "github.com/argoproj/pkg/sync" "github.com/go-redis/redis/v8" "github.com/golang-jwt/jwt/v4" @@ -42,6 +43,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/keepalive" "google.golang.org/grpc/metadata" "google.golang.org/grpc/reflection" "google.golang.org/grpc/status" @@ -610,6 +612,11 @@ func (a *ArgoCDServer) newGRPCServer() (*grpc.Server, application.AppResourceTre grpc.MaxRecvMsgSize(apiclient.MaxGRPCMessageSize), grpc.MaxSendMsgSize(apiclient.MaxGRPCMessageSize), grpc.ConnectionTimeout(300 * time.Second), + grpc.KeepaliveEnforcementPolicy( + keepalive.EnforcementPolicy{ + MinTime: common.GRPCKeepAliveEnforcementMinimum, + }, + ), } sensitiveMethods := map[string]bool{ "/cluster.ClusterService/Create": true, diff --git a/util/grpc/grpc.go b/util/grpc/grpc.go index cc7c1abff4c49..db28454af0ac0 100644 --- a/util/grpc/grpc.go +++ b/util/grpc/grpc.go @@ -8,6 +8,8 @@ import ( "time" "context" + + "github.com/argoproj/argo-cd/v2/common" "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -87,7 +89,7 @@ func BlockingDial(ctx context.Context, network, address string, creds credential grpc.FailOnNonTempDialError(true), grpc.WithContextDialer(dialer), grpc.WithTransportCredentials(insecure.NewCredentials()), // we are handling TLS, so tell grpc not to - grpc.WithKeepaliveParams(keepalive.ClientParameters{Time: 10 * time.Second}), + grpc.WithKeepaliveParams(keepalive.ClientParameters{Time: common.GRPCKeepAliveTime}), ) conn, err := grpc.DialContext(ctx, address, opts...) var res interface{}