Skip to content

Commit

Permalink
gRPC expander: allow realistic server responses, and log errors
Browse files Browse the repository at this point in the history
Expander requests' payloads can be rather heavy under upscale pressure,
as they're compounding all candidates options and unschedulable pods
that could fit each options. Expander responses are a subset of the
requests' payload items.

We're allowing ourself to send arbitrary payload sizes (gRPC
`defaultClientMaxSendMessageSize` is `math.MaxInt32`), but we're prone
to drop expander servers responses to the floor, due to the `4MiB`
`defaultClientMaxReceiveMessageSize`.

The arbitrary 128MiB value is meant to be huge (enough to support eg.
several dozen fat 1MiB pods) but not unlimited. Let me know if you'd
rather see that turned to be a command line flag, or an other value.

Also logging the possible gRPC call errors, as that of great help to
diagnose that kind of issues.
  • Loading branch information
bpineau authored and Panic Stevenson committed Aug 6, 2024
1 parent d50d925 commit 25737d6
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions cluster-autoscaler/expander/grpcplugin/grpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ import (
"google.golang.org/grpc/credentials"
)

const gRPCTimeout = 5 * time.Second
const (
gRPCTimeout = 5 * time.Second
gRPCMaxRecvMsgSize = 128 << 20
)

type grpcclientstrategy struct {
grpcClient protos.ExpanderClient
Expand All @@ -47,8 +50,6 @@ func NewFilter(expanderCert string, expanderUrl string) expander.Filter {
}

func createGRPCClient(expanderCert string, expanderUrl string) protos.ExpanderClient {
var dialOpt grpc.DialOption

if expanderCert == "" {
log.Fatalf("GRPC Expander Cert not specified, insecure connections not allowed")
return nil
Expand All @@ -58,9 +59,12 @@ func createGRPCClient(expanderCert string, expanderUrl string) protos.ExpanderCl
log.Fatalf("Failed to create TLS credentials %v", err)
return nil
}
dialOpt = grpc.WithTransportCredentials(creds)
klog.V(2).Infof("Dialing: %s with dialopt: %v", expanderUrl, dialOpt)
conn, err := grpc.Dial(expanderUrl, dialOpt)
dialOpts := []grpc.DialOption{
grpc.WithTransportCredentials(creds),
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(gRPCMaxRecvMsgSize)),
}
klog.V(2).Infof("Dialing: %s with dialopt: %v", expanderUrl, dialOpts)
conn, err := grpc.Dial(expanderUrl, dialOpts...)
if err != nil {
log.Fatalf("Fail to dial server: %v", err)
return nil
Expand All @@ -84,7 +88,7 @@ func (g *grpcclientstrategy) BestOptions(expansionOptions []expander.Option, nod
defer cancel()
bestOptionsResponse, err := g.grpcClient.BestOptions(ctx, &protos.BestOptionsRequest{Options: grpcOptionsSlice, NodeMap: grpcNodeMap})
if err != nil {
klog.V(4).Info("GRPC call timed out, no options filtered")
klog.V(4).Infof("GRPC call failed, no options filtered: %v", err)
return expansionOptions
}

Expand Down

0 comments on commit 25737d6

Please sign in to comment.