From e9c6d275116fc9fea993ed2c9096601a45e4aeb0 Mon Sep 17 00:00:00 2001 From: Zach Reyes Date: Wed, 31 May 2023 13:21:49 -0400 Subject: [PATCH 1/6] Fix Outlier Detection Config handling --- test/xds/xds_client_outlier_detection_test.go | 119 +++++- .../balancer/cdsbalancer/cdsbalancer.go | 100 +++--- .../cdsbalancer/cdsbalancer_security_test.go | 14 +- .../balancer/cdsbalancer/cdsbalancer_test.go | 277 +++++--------- .../clusterresolver/clusterresolver.go | 58 ++- .../clusterresolver/clusterresolver_test.go | 21 +- .../balancer/clusterresolver/config.go | 20 +- .../balancer/clusterresolver/config_test.go | 110 +++++- .../balancer/clusterresolver/configbuilder.go | 4 +- .../clusterresolver/configbuilder_test.go | 59 +-- .../clusterresolver/e2e_test/eds_impl_test.go | 12 +- .../balancer/clusterresolver/priority_test.go | 4 +- .../balancer/outlierdetection/balancer.go | 29 +- .../outlierdetection/balancer_test.go | 339 +++++++++++++++--- .../balancer/outlierdetection/config.go | 39 ++ .../xdsresource/tests/unmarshal_cds_test.go | 48 +-- .../xdsclient/xdsresource/type_cds.go | 68 +--- .../xdsclient/xdsresource/unmarshal_cds.go | 195 ++++++---- .../xdsresource/unmarshal_cds_test.go | 125 ++++--- 19 files changed, 1038 insertions(+), 603 deletions(-) diff --git a/test/xds/xds_client_outlier_detection_test.go b/test/xds/xds_client_outlier_detection_test.go index fa08e9be9a3a..67271d3b50c5 100644 --- a/test/xds/xds_client_outlier_detection_test.go +++ b/test/xds/xds_client_outlier_detection_test.go @@ -90,9 +90,9 @@ func (s) TestOutlierDetection_NoopConfig(t *testing.T) { // clientResourcesMultipleBackendsAndOD returns xDS resources which correspond // to multiple upstreams, corresponding different backends listening on // different localhost:port combinations. The resources also configure an -// Outlier Detection Balancer set up with Failure Percentage Algorithm, which -// ejects endpoints based on failure rate. -func clientResourcesMultipleBackendsAndOD(params e2e.ResourceParams, ports []uint32) e2e.UpdateOptions { +// Outlier Detection Balancer configured through the passed in Outlier Detection +// proto. +func clientResourcesMultipleBackendsAndOD(params e2e.ResourceParams, ports []uint32, od *v3clusterpb.OutlierDetection) e2e.UpdateOptions { routeConfigName := "route-" + params.DialTarget clusterName := "cluster-" + params.DialTarget endpointsName := "endpoints-" + params.DialTarget @@ -100,23 +100,14 @@ func clientResourcesMultipleBackendsAndOD(params e2e.ResourceParams, ports []uin NodeID: params.NodeID, Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(params.DialTarget, routeConfigName)}, Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(routeConfigName, params.DialTarget, clusterName)}, - Clusters: []*v3clusterpb.Cluster{clusterWithOutlierDetection(clusterName, endpointsName, params.SecLevel)}, + Clusters: []*v3clusterpb.Cluster{clusterWithOutlierDetection(clusterName, endpointsName, params.SecLevel, od)}, Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(endpointsName, params.Host, ports)}, } } -func clusterWithOutlierDetection(clusterName, edsServiceName string, secLevel e2e.SecurityLevel) *v3clusterpb.Cluster { +func clusterWithOutlierDetection(clusterName, edsServiceName string, secLevel e2e.SecurityLevel, od *v3clusterpb.OutlierDetection) *v3clusterpb.Cluster { cluster := e2e.DefaultCluster(clusterName, edsServiceName, secLevel) - cluster.OutlierDetection = &v3clusterpb.OutlierDetection{ - Interval: &durationpb.Duration{Nanos: 50000000}, // .5 seconds - BaseEjectionTime: &durationpb.Duration{Seconds: 30}, - MaxEjectionTime: &durationpb.Duration{Seconds: 300}, - MaxEjectionPercent: &wrapperspb.UInt32Value{Value: 1}, - FailurePercentageThreshold: &wrapperspb.UInt32Value{Value: 50}, - EnforcingFailurePercentage: &wrapperspb.UInt32Value{Value: 100}, - FailurePercentageRequestVolume: &wrapperspb.UInt32Value{Value: 8}, - FailurePercentageMinimumHosts: &wrapperspb.UInt32Value{Value: 3}, - } + cluster.OutlierDetection = od return cluster } @@ -197,7 +188,103 @@ func (s) TestOutlierDetectionWithOutlier(t *testing.T) { NodeID: nodeID, Host: "localhost", SecLevel: e2e.SecurityLevelNone, - }, []uint32{port1, port2, port3}) + }, []uint32{port1, port2, port3}, &v3clusterpb.OutlierDetection{ + Interval: &durationpb.Duration{Nanos: 50000000}, // .5 seconds + BaseEjectionTime: &durationpb.Duration{Seconds: 30}, + MaxEjectionTime: &durationpb.Duration{Seconds: 300}, + MaxEjectionPercent: &wrapperspb.UInt32Value{Value: 1}, + FailurePercentageThreshold: &wrapperspb.UInt32Value{Value: 50}, + EnforcingFailurePercentage: &wrapperspb.UInt32Value{Value: 100}, + FailurePercentageRequestVolume: &wrapperspb.UInt32Value{Value: 8}, + FailurePercentageMinimumHosts: &wrapperspb.UInt32Value{Value: 3}, + }) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + if err := managementServer.Update(ctx, resources); err != nil { + t.Fatal(err) + } + + cc, err := grpc.Dial(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r)) + if err != nil { + t.Fatalf("failed to dial local test server: %v", err) + } + defer cc.Close() + + client := testgrpc.NewTestServiceClient(cc) + + fullAddresses := []resolver.Address{ + {Addr: backend1.Address}, + {Addr: backend2.Address}, + {Addr: backend3.Address}, + } + // At first, due to no statistics on each of the backends, the 3 + // upstreams should all be round robined across. + if err = checkRoundRobinRPCs(ctx, client, fullAddresses); err != nil { + t.Fatalf("error in expected round robin: %v", err) + } + + // The addresses which don't return errors. + okAddresses := []resolver.Address{ + {Addr: backend1.Address}, + {Addr: backend2.Address}, + } + // After calling the three upstreams, one of them constantly error + // and should eventually be ejected for a period of time. This + // period of time should cause the RPC's to be round robined only + // across the two that are healthy. + if err = checkRoundRobinRPCs(ctx, client, okAddresses); err != nil { + t.Fatalf("error in expected round robin: %v", err) + } +} + +// TestOutlierDetectionXDSDefaultOn tests that Outlier Detection is by default +// configured on in the xDS Flow. If the Outlier Detection proto message is +// present with SuccessRateEjection unset, then Outlier Detection should be +// turned on. The test setups and xDS system with xDS resources with Outlier +// Detection present in the CDS update, but with SuccessRateEjection unset, and +// asserts that Outlier Detection is turned on and ejects upstreams. +func (s) TestOutlierDetectionXDSDefaultOn(t *testing.T) { + managementServer, nodeID, _, r, cleanup := e2e.SetupManagementServer(t, e2e.ManagementServerOptions{}) + defer cleanup() + + // Working backend 1. + backend1 := stubserver.StartTestService(t, nil) + port1 := testutils.ParsePort(t, backend1.Address) + defer backend1.Stop() + + // Working backend 2. + backend2 := stubserver.StartTestService(t, nil) + port2 := testutils.ParsePort(t, backend2.Address) + defer backend2.Stop() + + // Backend 3 that will always return an error and eventually ejected. + backend3 := stubserver.StartTestService(t, &stubserver.StubServer{ + EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return nil, errors.New("some error") }, + }) + port3 := testutils.ParsePort(t, backend3.Address) + defer backend3.Stop() + + // Configure CDS resources with Outlier Detection set but + // EnforcingSuccessRate unset. This should cause Outlier Detection to be + // configured with SuccessRateEjection present in configuration, which will + // eventually be populated with it's default values along with the knobs set + // as SuccessRate fields in the proto, and thus Outlier Detection should be + // on and actively eject upstreams. + const serviceName = "my-service-client-side-xds" + resources := clientResourcesMultipleBackendsAndOD(e2e.ResourceParams{ + DialTarget: serviceName, + NodeID: nodeID, + Host: "localhost", + SecLevel: e2e.SecurityLevelNone, + }, []uint32{port1, port2, port3}, &v3clusterpb.OutlierDetection{ + // Need to set knobs to trigger ejection within the test time frame. + Interval: &durationpb.Duration{Nanos: 50000000}, + // EnforcingSuccessRateSet to nil, causes success rate algorithm to be + // turned on. + SuccessRateMinimumHosts: &wrapperspb.UInt32Value{Value: 1}, + SuccessRateRequestVolume: &wrapperspb.UInt32Value{Value: 8}, + SuccessRateStdevFactor: &wrapperspb.UInt32Value{Value: 1}, + }) ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() if err := managementServer.Update(ctx, resources); err != nil { diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index c9a1611c169b..1ffb26b19a8b 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -33,11 +33,9 @@ import ( "google.golang.org/grpc/internal/grpclog" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/pretty" - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" "google.golang.org/grpc/xds/internal/balancer/clusterresolver" - "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -75,11 +73,25 @@ type bb struct{} // Build creates a new CDS balancer with the ClientConn. func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { + builder := balancer.Get(clusterresolver.Name) + if builder == nil { + // Shouldn't happen, registered through imported Cluster Resolver, + // defensive programming. + logger.Errorf("%q LB policy is needed but not registered", clusterresolver.Name) + return nil + } + crParser, ok := builder.(balancer.ConfigParser) + if !ok { + // Shouldn't happen, imported Cluster Resolver builder has this method. + logger.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name) + return nil + } b := &cdsBalancer{ bOpts: opts, updateCh: buffer.NewUnbounded(), closed: grpcsync.NewEvent(), done: grpcsync.NewEvent(), + crParser: crParser, xdsHI: xdsinternal.NewHandshakeInfo(nil, nil), } b.logger = prefixLogger((b)) @@ -160,6 +172,7 @@ type cdsBalancer struct { logger *grpclog.PrefixLogger closed *grpcsync.Event done *grpcsync.Event + crParser balancer.ConfigParser // The certificate providers are cached here to that they can be closed when // a new provider is to be created. @@ -271,52 +284,6 @@ func buildProviderFunc(configs map[string]*certprovider.BuildableConfig, instanc return provider, nil } -func outlierDetectionToConfig(od *xdsresource.OutlierDetection) outlierdetection.LBConfig { // Already validated - no need to return error - if od == nil { - // "If the outlier_detection field is not set in the Cluster message, a - // "no-op" outlier_detection config will be generated, with interval set - // to the maximum possible value and all other fields unset." - A50 - return outlierdetection.LBConfig{ - Interval: 1<<63 - 1, - } - } - - // "if the enforcing_success_rate field is set to 0, the config - // success_rate_ejection field will be null and all success_rate_* fields - // will be ignored." - A50 - var sre *outlierdetection.SuccessRateEjection - if od.EnforcingSuccessRate != 0 { - sre = &outlierdetection.SuccessRateEjection{ - StdevFactor: od.SuccessRateStdevFactor, - EnforcementPercentage: od.EnforcingSuccessRate, - MinimumHosts: od.SuccessRateMinimumHosts, - RequestVolume: od.SuccessRateRequestVolume, - } - } - - // "If the enforcing_failure_percent field is set to 0 or null, the config - // failure_percent_ejection field will be null and all failure_percent_* - // fields will be ignored." - A50 - var fpe *outlierdetection.FailurePercentageEjection - if od.EnforcingFailurePercentage != 0 { - fpe = &outlierdetection.FailurePercentageEjection{ - Threshold: od.FailurePercentageThreshold, - EnforcementPercentage: od.EnforcingFailurePercentage, - MinimumHosts: od.FailurePercentageMinimumHosts, - RequestVolume: od.FailurePercentageRequestVolume, - } - } - - return outlierdetection.LBConfig{ - Interval: internalserviceconfig.Duration(od.Interval), - BaseEjectionTime: internalserviceconfig.Duration(od.BaseEjectionTime), - MaxEjectionTime: internalserviceconfig.Duration(od.MaxEjectionTime), - MaxEjectionPercent: od.MaxEjectionPercent, - SuccessRateEjection: sre, - FailurePercentageEjection: fpe, - } -} - // handleWatchUpdate handles a watch update from the xDS Client. Good updates // lead to clientConn updates being invoked on the underlying cluster_resolver balancer. func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { @@ -390,28 +357,43 @@ func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { b.logger.Infof("Unexpected cluster type %v when handling update from cluster handler", cu.ClusterType) } if envconfig.XDSOutlierDetection { - dms[i].OutlierDetection = outlierDetectionToConfig(cu.OutlierDetection) + odJSON := cu.OutlierDetection + // "In the cds LB policy, if the outlier_detection field is not set in + // the Cluster resource, a "no-op" outlier_detection config will be + // generated in the corresponding DiscoveryMechanism config, with all + // fields unset." - A50 + if odJSON == nil { + // This will pick up top level defaults in Cluster Resolver + // ParseConfig, but sre and fpe will be nil still so still a + // "no-op" config. + odJSON = json.RawMessage(`{}`) + } + dms[i].OutlierDetection = odJSON } } + // Prepare Cluster Resolver config, marshal into JSON, and then Parse it to + // get configuration to send downward to Cluster Resolver. lbCfg := &clusterresolver.LBConfig{ DiscoveryMechanisms: dms, + XDSLBPolicy: update.lbPolicy, + } + crLBCfgJSON, err := json.Marshal(lbCfg) + if err != nil { + // Shouldn't happen, since we just prepared struct. + b.logger.Errorf("cds_balancer: error marshalling prepared config: %v", lbCfg) + return } - bc := &internalserviceconfig.BalancerConfig{} - if err := json.Unmarshal(update.lbPolicy, bc); err != nil { - // This will never occur, valid configuration is emitted from the xDS - // Client. Validity is already checked in the xDS Client, however, this - // double validation is present because Unmarshalling and Validating are - // coupled into one json.Unmarshal operation). We will switch this in - // the future to two separate operations. - b.logger.Errorf("Emitted lbPolicy %s from xDS Client is invalid: %v", update.lbPolicy, err) + var sc serviceconfig.LoadBalancingConfig + if sc, err = b.crParser.ParseConfig(crLBCfgJSON); err != nil { + b.logger.Errorf("cds_balancer: cluster_resolver config generated %v is invalid: %v", crLBCfgJSON, err) return } - lbCfg.XDSLBPolicy = bc + ccState := balancer.ClientConnState{ ResolverState: xdsclient.SetClient(resolver.State{}, b.xdsClient), - BalancerConfig: lbCfg, + BalancerConfig: sc, } if err := b.childLB.UpdateClientConnState(ccState); err != nil { b.logger.Errorf("Encountered error when sending config {%+v} to child policy: %v", ccState, err) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go index eb687aa70f76..fcd2e26960c0 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go @@ -253,7 +253,7 @@ func (s) TestSecurityConfigWithoutXDSCreds(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -312,7 +312,7 @@ func (s) TestNoSecurityConfigWithXDSCreds(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -468,7 +468,7 @@ func (s) TestSecurityConfigUpdate_BadToGood(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -502,7 +502,7 @@ func (s) TestGoodSecurityConfig(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -555,7 +555,7 @@ func (s) TestSecurityConfigUpdate_GoodToFallback(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -608,7 +608,7 @@ func (s) TestSecurityConfigUpdate_GoodToBad(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -687,7 +687,7 @@ func (s) TestSecurityConfigUpdate_GoodToGood(t *testing.T) { }, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go index 35923bc8624a..19e937536917 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go @@ -58,9 +58,8 @@ var ( Type: "insecure", }, } - noopODLBCfg = outlierdetection.LBConfig{ - Interval: 1<<63 - 1, - } + noopODLBCfg = outlierdetection.LBConfig{} + noopODLBCfgJSON, _ = json.Marshal(noopODLBCfg) wrrLocalityLBConfig = &internalserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ @@ -166,7 +165,11 @@ func (tb *testEDSBalancer) waitForClientConnUpdate(ctx context.Context, wantCCS if xdsclient.FromResolverState(gotCCS.ResolverState) == nil { return fmt.Errorf("want resolver state with XDSClient attached, got one without") } - if diff := cmp.Diff(gotCCS, wantCCS, cmpopts.IgnoreFields(resolver.State{}, "Attributes")); diff != "" { + + // Calls into Cluster Resolver LB Config Equal(), which ignores JSON + // configuration but compares the Parsed Configuration of the JSON fields + // emitted from ParseConfig() on the cluster resolver. + if diff := cmp.Diff(gotCCS, wantCCS, cmpopts.IgnoreFields(resolver.State{}, "Attributes"), cmp.AllowUnexported(clusterresolver.LBConfig{})); diff != "" { return fmt.Errorf("received unexpected ClientConnState, diff (-got +want): %v", diff) } return nil @@ -229,9 +232,26 @@ func cdsCCS(cluster string, xdsC xdsclient.XDSClient) balancer.ClientConnState { } } -// edsCCS is a helper function to construct a good update passed from the -// cdsBalancer to the edsBalancer. -func edsCCS(service string, countMax *uint32, enableLRS bool, xdslbpolicy *internalserviceconfig.BalancerConfig, odConfig outlierdetection.LBConfig) balancer.ClientConnState { +// edsCCS is a helper function to construct a Client Conn update which +// represents what the CDS Balancer passes to the Cluster Resolver. It calls +// into Cluster Resolver's ParseConfig to get the service config to fill out the +// Client Conn State. This is to fill out unexported parts of the Cluster +// Resolver config struct. Returns an empty Client Conn State if it encounters +// an error building out the Client Conn State. +func edsCCS(service string, countMax *uint32, enableLRS bool, xdslbpolicy json.RawMessage, odConfig json.RawMessage) balancer.ClientConnState { + builder := balancer.Get(clusterresolver.Name) + if builder == nil { + // Shouldn't happen, registered through imported Cluster Resolver, + // defensive programming. + logger.Errorf("%q LB policy is needed but not registered", clusterresolver.Name) + return balancer.ClientConnState{} // will fail the calling test eventually through error in diff. + } + crParser, ok := builder.(balancer.ConfigParser) + if !ok { + // Shouldn't happen, imported Cluster Resolver builder has this method. + logger.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name) + return balancer.ClientConnState{} + } discoveryMechanism := clusterresolver.DiscoveryMechanism{ Type: clusterresolver.DiscoveryMechanismTypeEDS, Cluster: service, @@ -246,8 +266,21 @@ func edsCCS(service string, countMax *uint32, enableLRS bool, xdslbpolicy *inter XDSLBPolicy: xdslbpolicy, } + crLBCfgJSON, err := json.Marshal(lbCfg) + if err != nil { + // Shouldn't happen, since we just prepared struct. + logger.Errorf("cds_balancer: error marshalling prepared config: %v", lbCfg) + return balancer.ClientConnState{} + } + + var sc serviceconfig.LoadBalancingConfig + if sc, err = crParser.ParseConfig(crLBCfgJSON); err != nil { + logger.Errorf("cds_balancer: cluster_resolver config generated %v is invalid: %v", crLBCfgJSON, err) + return balancer.ClientConnState{} + } + return balancer.ClientConnState{ - BalancerConfig: lbCfg, + BalancerConfig: sc, } } @@ -402,7 +435,7 @@ func (s) TestHandleClusterUpdate(t *testing.T) { LRSServerConfig: xdsresource.ClusterLRSServerSelf, LBPolicy: wrrLocalityLBConfigJSON, }, - wantCCS: edsCCS(serviceName, nil, true, wrrLocalityLBConfig, noopODLBCfg), + wantCCS: edsCCS(serviceName, nil, true, wrrLocalityLBConfigJSON, noopODLBCfgJSON), }, { name: "happy-case-without-lrs", @@ -410,7 +443,7 @@ func (s) TestHandleClusterUpdate(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, }, - wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg), + wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON), }, { name: "happy-case-with-ring-hash-lb-policy", @@ -418,49 +451,64 @@ func (s) TestHandleClusterUpdate(t *testing.T) { ClusterName: serviceName, LBPolicy: ringHashLBConfigJSON, }, - wantCCS: edsCCS(serviceName, nil, false, &internalserviceconfig.BalancerConfig{ - Name: ringhash.Name, - Config: &ringhash.LBConfig{MinRingSize: 10, MaxRingSize: 100}, - }, noopODLBCfg), + wantCCS: edsCCS(serviceName, nil, false, ringHashLBConfigJSON, noopODLBCfgJSON), }, { - name: "happy-case-outlier-detection", + name: "happy-case-outlier-detection-xds-defaults", + // i.e. od proto set but no proto fields set cdsUpdate: xdsresource.ClusterUpdate{ ClusterName: serviceName, - OutlierDetection: &xdsresource.OutlierDetection{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, - MaxEjectionPercent: 10, - SuccessRateStdevFactor: 1900, - EnforcingSuccessRate: 100, - SuccessRateMinimumHosts: 5, - SuccessRateRequestVolume: 100, - FailurePercentageThreshold: 85, - EnforcingFailurePercentage: 5, - FailurePercentageMinimumHosts: 5, - FailurePercentageRequestVolume: 50, - }, + OutlierDetection: json.RawMessage(`{ + "successRateEjection": {} + }`), LBPolicy: wrrLocalityLBConfigJSON, }, - wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfig, outlierdetection.LBConfig{ - Interval: internalserviceconfig.Duration(10 * time.Second), - BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), - MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), - MaxEjectionPercent: 10, - SuccessRateEjection: &outlierdetection.SuccessRateEjection{ - StdevFactor: 1900, - EnforcementPercentage: 100, - MinimumHosts: 5, - RequestVolume: 100, + wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, json.RawMessage(`{ + "successRateEjection": {} + }`)), + }, + { + name: "happy-case-outlier-detection-all-fields-set", + cdsUpdate: xdsresource.ClusterUpdate{ + ClusterName: serviceName, + OutlierDetection: json.RawMessage(`{ + "interval": "10s", + "baseEjectionTime": "30s", + "maxEjectionTime": "300s", + "maxEjectionPercent": 10, + "successRateEjection": { + "stdevFactor": 1900, + "enforcementPercentage": 100, + "minimumHosts": 5, + "requestVolume": 100 }, - FailurePercentageEjection: &outlierdetection.FailurePercentageEjection{ - Threshold: 85, - EnforcementPercentage: 5, - MinimumHosts: 5, - RequestVolume: 50, + "failurePercentageEjection": { + "threshold": 85, + "enforcementPercentage": 5, + "minimumHosts": 5, + "requestVolume": 50 + } + }`), + LBPolicy: wrrLocalityLBConfigJSON, + }, + wantCCS: edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, json.RawMessage(`{ + "interval": "10s", + "baseEjectionTime": "30s", + "maxEjectionTime": "300s", + "maxEjectionPercent": 10, + "successRateEjection": { + "stdevFactor": 1900, + "enforcementPercentage": 100, + "minimumHosts": 5, + "requestVolume": 100 }, - }), + "failurePercentageEjection": { + "threshold": 85, + "enforcementPercentage": 5, + "minimumHosts": 5, + "requestVolume": 50 + } + }`)), }, } @@ -531,7 +579,7 @@ func (s) TestHandleClusterUpdateError(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -619,7 +667,7 @@ func (s) TestResolverError(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -671,7 +719,7 @@ func (s) TestUpdateSubConnState(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -709,7 +757,7 @@ func (s) TestCircuitBreaking(t *testing.T) { MaxRequests: &maxRequests, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(clusterName, &maxRequests, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(clusterName, &maxRequests, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -746,7 +794,7 @@ func (s) TestClose(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -820,7 +868,7 @@ func (s) TestExitIdle(t *testing.T) { ClusterName: serviceName, LBPolicy: wrrLocalityLBConfigJSON, } - wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfig, noopODLBCfg) + wantCCS := edsCCS(serviceName, nil, false, wrrLocalityLBConfigJSON, noopODLBCfgJSON) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -882,130 +930,3 @@ func (s) TestParseConfig(t *testing.T) { }) } } - -func (s) TestOutlierDetectionToConfig(t *testing.T) { - tests := []struct { - name string - od *xdsresource.OutlierDetection - odLBCfgWant outlierdetection.LBConfig - }{ - // "if the outlier_detection field is not set in the Cluster resource, - // a "no-op" outlier_detection config will be generated in the - // corresponding DiscoveryMechanism config, with interval set to the - // maximum possible value and all other fields unset." - A50 - { - name: "no-op-outlier-detection-config", - od: nil, - odLBCfgWant: noopODLBCfg, - }, - // "if the enforcing_success_rate field is set to 0, the config - // success_rate_ejection field will be null and all success_rate_* - // fields will be ignored." - A50 - { - name: "enforcing-success-rate-zero", - od: &xdsresource.OutlierDetection{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, - MaxEjectionPercent: 10, - SuccessRateStdevFactor: 1900, - EnforcingSuccessRate: 0, - SuccessRateMinimumHosts: 5, - SuccessRateRequestVolume: 100, - FailurePercentageThreshold: 85, - EnforcingFailurePercentage: 5, - FailurePercentageMinimumHosts: 5, - FailurePercentageRequestVolume: 50, - }, - odLBCfgWant: outlierdetection.LBConfig{ - Interval: internalserviceconfig.Duration(10 * time.Second), - BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), - MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), - MaxEjectionPercent: 10, - SuccessRateEjection: nil, - FailurePercentageEjection: &outlierdetection.FailurePercentageEjection{ - Threshold: 85, - EnforcementPercentage: 5, - MinimumHosts: 5, - RequestVolume: 50, - }, - }, - }, - // "If the enforcing_failure_percent field is set to 0 or null, the - // config failure_percent_ejection field will be null and all - // failure_percent_* fields will be ignored." - A50 - { - name: "enforcing-failure-percentage-zero", - od: &xdsresource.OutlierDetection{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, - MaxEjectionPercent: 10, - SuccessRateStdevFactor: 1900, - EnforcingSuccessRate: 100, - SuccessRateMinimumHosts: 5, - SuccessRateRequestVolume: 100, - FailurePercentageThreshold: 85, - EnforcingFailurePercentage: 0, - FailurePercentageMinimumHosts: 5, - FailurePercentageRequestVolume: 50, - }, - odLBCfgWant: outlierdetection.LBConfig{ - Interval: internalserviceconfig.Duration(10 * time.Second), - BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), - MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), - MaxEjectionPercent: 10, - SuccessRateEjection: &outlierdetection.SuccessRateEjection{ - StdevFactor: 1900, - EnforcementPercentage: 100, - MinimumHosts: 5, - RequestVolume: 100, - }, - FailurePercentageEjection: nil, - }, - }, - { - name: "normal-conversion", - od: &xdsresource.OutlierDetection{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, - MaxEjectionPercent: 10, - SuccessRateStdevFactor: 1900, - EnforcingSuccessRate: 100, - SuccessRateMinimumHosts: 5, - SuccessRateRequestVolume: 100, - FailurePercentageThreshold: 85, - EnforcingFailurePercentage: 5, - FailurePercentageMinimumHosts: 5, - FailurePercentageRequestVolume: 50, - }, - odLBCfgWant: outlierdetection.LBConfig{ - Interval: internalserviceconfig.Duration(10 * time.Second), - BaseEjectionTime: internalserviceconfig.Duration(30 * time.Second), - MaxEjectionTime: internalserviceconfig.Duration(300 * time.Second), - MaxEjectionPercent: 10, - SuccessRateEjection: &outlierdetection.SuccessRateEjection{ - StdevFactor: 1900, - EnforcementPercentage: 100, - MinimumHosts: 5, - RequestVolume: 100, - }, - FailurePercentageEjection: &outlierdetection.FailurePercentageEjection{ - Threshold: 85, - EnforcementPercentage: 5, - MinimumHosts: 5, - RequestVolume: 50, - }, - }, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - odLBCfgGot := outlierDetectionToConfig(test.od) - if diff := cmp.Diff(odLBCfgGot, test.odLBCfgWant); diff != "" { - t.Fatalf("outlierDetectionToConfig(%v) (-want, +got):\n%s", test.od, diff) - } - }) - } -} diff --git a/xds/internal/balancer/clusterresolver/clusterresolver.go b/xds/internal/balancer/clusterresolver/clusterresolver.go index 18dac2596d0a..f79f06d7b07c 100644 --- a/xds/internal/balancer/clusterresolver/clusterresolver.go +++ b/xds/internal/balancer/clusterresolver/clusterresolver.go @@ -25,21 +25,20 @@ import ( "encoding/json" "errors" "fmt" - "strings" "google.golang.org/grpc/attributes" "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/base" - "google.golang.org/grpc/balancer/roundrobin" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/buffer" + "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpclog" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/pretty" "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" + "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/priority" - "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -99,15 +98,52 @@ func (bb) Name() string { return Name } -func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { - var cfg LBConfig - if err := json.Unmarshal(c, &cfg); err != nil { - return nil, fmt.Errorf("unable to unmarshal balancer config %s into cluster-resolver config, error: %v", string(c), err) +func (bb) ParseConfig(j json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { + odBuilder := balancer.Get(outlierdetection.Name) + if odBuilder == nil { + // Shouldn't happen, registered through imported Outlier Detection, + // defensive programming. + return nil, fmt.Errorf("%q LB policy is needed but not registered", outlierdetection.Name) } - if lbp := cfg.XDSLBPolicy; lbp != nil && !strings.EqualFold(lbp.Name, roundrobin.Name) && !strings.EqualFold(lbp.Name, ringhash.Name) { - return nil, fmt.Errorf("unsupported child policy with name %q, not one of {%q,%q}", lbp.Name, roundrobin.Name, ringhash.Name) + odParser, ok := odBuilder.(balancer.ConfigParser) + if !ok { + // Shouldn't happen, imported Outlier Detection builder has this method. + return nil, fmt.Errorf("%q LB policy does not implement a config parser", outlierdetection.Name) + } + + var cfg *LBConfig + if err := json.Unmarshal(j, &cfg); err != nil { + return nil, fmt.Errorf("unable to unmarshal balancer config %s into cluster-resolver config, error: %v", string(j), err) + } + + odCfgs := make([]outlierdetection.LBConfig, len(cfg.DiscoveryMechanisms)) + for i, dm := range cfg.DiscoveryMechanisms { + lbCfg, err := odParser.ParseConfig(dm.OutlierDetection) + if err != nil { + return nil, fmt.Errorf("error parsing Outlier Detection config: %v", dm.OutlierDetection) + } + odCfg, ok := lbCfg.(*outlierdetection.LBConfig) + if !ok { + // Shouldn't happen, Parser built at build time with Outlier Detection + // builder pulled from gRPC LB Registry. + return nil, fmt.Errorf("odParser returned config with unexpected type %T: %v", lbCfg, lbCfg) + } + odCfgs[i] = *odCfg + } + if envconfig.XDSOutlierDetection { + for i, odCfg := range odCfgs { + cfg.DiscoveryMechanisms[i].outlierDetection = odCfg + } + } + if err := json.Unmarshal(cfg.XDSLBPolicy, &cfg.xdsLBPolicy); err != nil { + // This will never occur, valid configuration is emitted from the xDS + // Client. Validity is already checked in the xDS Client, however, this + // double validation is present because Unmarshalling and Validating are + // coupled into one json.Unmarshal operation). We will switch this in + // the future to two separate operations. + return nil, fmt.Errorf("error unmarshaling xDS LB Policy: %v", err) } - return &cfg, nil + return cfg, nil } // ccUpdate wraps a clientConn update received from gRPC. @@ -208,7 +244,7 @@ func (b *clusterResolverBalancer) updateChildConfig() { b.child = newChildBalancer(b.priorityBuilder, b.cc, b.bOpts) } - childCfgBytes, addrs, err := buildPriorityConfigJSON(b.priorities, b.config.XDSLBPolicy) + childCfgBytes, addrs, err := buildPriorityConfigJSON(b.priorities, &b.config.xdsLBPolicy) if err != nil { b.logger.Warningf("Failed to build child policy config: %v", err) return diff --git a/xds/internal/balancer/clusterresolver/clusterresolver_test.go b/xds/internal/balancer/clusterresolver/clusterresolver_test.go index 65cb7a9bf981..6d798a1543b0 100644 --- a/xds/internal/balancer/clusterresolver/clusterresolver_test.go +++ b/xds/internal/balancer/clusterresolver/clusterresolver_test.go @@ -29,7 +29,7 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal/grpctest" - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/resolver" xdsinternal "google.golang.org/grpc/xds/internal" @@ -325,12 +325,16 @@ func newLBConfigWithOneEDS(edsServiceName string) *LBConfig { Type: DiscoveryMechanismTypeEDS, EDSServiceName: edsServiceName, }}, + xdsLBPolicy: iserviceconfig.BalancerConfig{ + Name: "ROUND_ROBIN", + Config: nil, + }, } } func newLBConfigWithOneEDSAndOutlierDetection(edsServiceName string, odCfg outlierdetection.LBConfig) *LBConfig { lbCfg := newLBConfigWithOneEDS(edsServiceName) - lbCfg.DiscoveryMechanisms[0].OutlierDetection = odCfg + lbCfg.DiscoveryMechanisms[0].outlierDetection = odCfg return lbCfg } @@ -381,15 +385,22 @@ func (s) TestOutlierDetection(t *testing.T) { pCfgWant := &priority.LBConfig{ Children: map[string]*priority.Child{ "priority-0-0": { - Config: &internalserviceconfig.BalancerConfig{ + Config: &iserviceconfig.BalancerConfig{ Name: outlierdetection.Name, Config: &outlierdetection.LBConfig{ - Interval: 1<<63 - 1, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: clusterimpl.Name, Config: &clusterimpl.LBConfig{ Cluster: testClusterName, EDSServiceName: "test-eds-service-name", + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "ROUND_ROBIN", + Config: nil, + }, }, }, }, diff --git a/xds/internal/balancer/clusterresolver/config.go b/xds/internal/balancer/clusterresolver/config.go index 2458b106772f..c67608819185 100644 --- a/xds/internal/balancer/clusterresolver/config.go +++ b/xds/internal/balancer/clusterresolver/config.go @@ -102,11 +102,13 @@ type DiscoveryMechanism struct { DNSHostname string `json:"dnsHostname,omitempty"` // OutlierDetection is the Outlier Detection LB configuration for this // priority. - OutlierDetection outlierdetection.LBConfig `json:"outlierDetection,omitempty"` + OutlierDetection json.RawMessage `json:"outlierDetection,omitempty"` + outlierDetection outlierdetection.LBConfig } // Equal returns whether the DiscoveryMechanism is the same with the parameter. func (dm DiscoveryMechanism) Equal(b DiscoveryMechanism) bool { + od := &dm.outlierDetection switch { case dm.Cluster != b.Cluster: return false @@ -118,7 +120,7 @@ func (dm DiscoveryMechanism) Equal(b DiscoveryMechanism) bool { return false case dm.DNSHostname != b.DNSHostname: return false - case !dm.OutlierDetection.EqualIgnoringChildPolicy(&b.OutlierDetection): + case !od.EqualIgnoringChildPolicy(&b.outlierDetection): return false } @@ -151,16 +153,6 @@ type LBConfig struct { DiscoveryMechanisms []DiscoveryMechanism `json:"discoveryMechanisms,omitempty"` // XDSLBPolicy specifies the policy for locality picking and endpoint picking. - // - // Note that it's not normal balancing policy, and it can only be either - // ROUND_ROBIN or RING_HASH. - // - // For ROUND_ROBIN, the policy name will be "ROUND_ROBIN", and the config - // will be empty. This sets the locality-picking policy to weighted_target - // and the endpoint-picking policy to round_robin. - // - // For RING_HASH, the policy name will be "RING_HASH", and the config will - // be lb config for the ring_hash_experimental LB Policy. ring_hash policy - // is responsible for both locality picking and endpoint picking. - XDSLBPolicy *internalserviceconfig.BalancerConfig `json:"xdsLbPolicy,omitempty"` + XDSLBPolicy json.RawMessage `json:"xdsLbPolicy,omitempty"` + xdsLBPolicy internalserviceconfig.BalancerConfig } diff --git a/xds/internal/balancer/clusterresolver/config_test.go b/xds/internal/balancer/clusterresolver/config_test.go index fd17f3ede6d1..608c17ef78c8 100644 --- a/xds/internal/balancer/clusterresolver/config_test.go +++ b/xds/internal/balancer/clusterresolver/config_test.go @@ -21,10 +21,13 @@ package clusterresolver import ( "encoding/json" "testing" + "time" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "google.golang.org/grpc/balancer" - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/xdsclient/bootstrap" ) @@ -101,8 +104,10 @@ const ( }, "maxConcurrentRequests": 314, "type": "EDS", - "edsServiceName": "test-eds-service-name" - }] + "edsServiceName": "test-eds-service-name", + "outlierDetection": {} + }], + "xdsLbPolicy":[{"ROUND_ROBIN":{}}] }` testJSONConfig2 = `{ "discoveryMechanisms": [{ @@ -113,10 +118,13 @@ const ( }, "maxConcurrentRequests": 314, "type": "EDS", - "edsServiceName": "test-eds-service-name" + "edsServiceName": "test-eds-service-name", + "outlierDetection": {} },{ - "type": "LOGICAL_DNS" - }] + "type": "LOGICAL_DNS", + "outlierDetection": {} + }], + "xdsLbPolicy":[{"ROUND_ROBIN":{}}] }` testJSONConfig3 = `{ "discoveryMechanisms": [{ @@ -127,7 +135,8 @@ const ( }, "maxConcurrentRequests": 314, "type": "EDS", - "edsServiceName": "test-eds-service-name" + "edsServiceName": "test-eds-service-name", + "outlierDetection": {} }], "xdsLbPolicy":[{"ROUND_ROBIN":{}}] }` @@ -140,7 +149,8 @@ const ( }, "maxConcurrentRequests": 314, "type": "EDS", - "edsServiceName": "test-eds-service-name" + "edsServiceName": "test-eds-service-name", + "outlierDetection": {} }], "xdsLbPolicy":[{"ring_hash_experimental":{}}] }` @@ -153,9 +163,10 @@ const ( }, "maxConcurrentRequests": 314, "type": "EDS", - "edsServiceName": "test-eds-service-name" + "edsServiceName": "test-eds-service-name", + "outlierDetection": {} }], - "xdsLbPolicy":[{"pick_first":{}}] + "xdsLbPolicy":[{"ROUND_ROBIN":{}}] }` ) @@ -190,9 +201,19 @@ func TestParseConfig(t *testing.T) { MaxConcurrentRequests: newUint32(testMaxRequests), Type: DiscoveryMechanismTypeEDS, EDSServiceName: testEDSService, + outlierDetection: outlierdetection.LBConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + // sre and fpe are both nil + }, }, }, - XDSLBPolicy: nil, + xdsLBPolicy: iserviceconfig.BalancerConfig{ // do we want to make this not pointer + Name: "ROUND_ROBIN", + Config: nil, + }, }, wantErr: false, }, @@ -207,12 +228,29 @@ func TestParseConfig(t *testing.T) { MaxConcurrentRequests: newUint32(testMaxRequests), Type: DiscoveryMechanismTypeEDS, EDSServiceName: testEDSService, + outlierDetection: outlierdetection.LBConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + // sre and fpe are both nil + }, }, { Type: DiscoveryMechanismTypeLogicalDNS, + outlierDetection: outlierdetection.LBConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + // sre and fpe are both nil + }, }, }, - XDSLBPolicy: nil, + xdsLBPolicy: iserviceconfig.BalancerConfig{ + Name: "ROUND_ROBIN", + Config: nil, + }, }, wantErr: false, }, @@ -227,9 +265,16 @@ func TestParseConfig(t *testing.T) { MaxConcurrentRequests: newUint32(testMaxRequests), Type: DiscoveryMechanismTypeEDS, EDSServiceName: testEDSService, + outlierDetection: outlierdetection.LBConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + // sre and fpe are both nil + }, }, }, - XDSLBPolicy: &internalserviceconfig.BalancerConfig{ + xdsLBPolicy: iserviceconfig.BalancerConfig{ Name: "ROUND_ROBIN", Config: nil, }, @@ -247,9 +292,16 @@ func TestParseConfig(t *testing.T) { MaxConcurrentRequests: newUint32(testMaxRequests), Type: DiscoveryMechanismTypeEDS, EDSServiceName: testEDSService, + outlierDetection: outlierdetection.LBConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + // sre and fpe are both nil + }, }, }, - XDSLBPolicy: &internalserviceconfig.BalancerConfig{ + xdsLBPolicy: iserviceconfig.BalancerConfig{ Name: ringhash.Name, Config: &ringhash.LBConfig{MinRingSize: 1024, MaxRingSize: 4096}, // Ringhash LB config with default min and max. }, @@ -257,9 +309,31 @@ func TestParseConfig(t *testing.T) { wantErr: false, }, { - name: "unsupported picking policy", - js: testJSONConfig5, - wantErr: true, + name: "noop-outlier-detection", + js: testJSONConfig5, + want: &LBConfig{ + DiscoveryMechanisms: []DiscoveryMechanism{ + { + Cluster: testClusterName, + LoadReportingServer: testLRSServerConfig, + MaxConcurrentRequests: newUint32(testMaxRequests), + Type: DiscoveryMechanismTypeEDS, + EDSServiceName: testEDSService, + outlierDetection: outlierdetection.LBConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + // sre and fpe are both nil + }, + }, + }, + xdsLBPolicy: iserviceconfig.BalancerConfig{ + Name: "ROUND_ROBIN", + Config: nil, + }, + }, + wantErr: false, }, } for _, tt := range tests { @@ -279,7 +353,7 @@ func TestParseConfig(t *testing.T) { if tt.wantErr { return } - if diff := cmp.Diff(got, tt.want); diff != "" { + if diff := cmp.Diff(got, tt.want, cmp.AllowUnexported(LBConfig{}), cmpopts.IgnoreFields(LBConfig{}, "XDSLBPolicy")); diff != "" { t.Errorf("parseConfig() got unexpected output, diff (-got +want): %v", diff) } }) diff --git a/xds/internal/balancer/clusterresolver/configbuilder.go b/xds/internal/balancer/clusterresolver/configbuilder.go index 06b0aec2f311..4b83dfb2bfa0 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder.go +++ b/xds/internal/balancer/clusterresolver/configbuilder.go @@ -100,7 +100,7 @@ func buildPriorityConfig(priorities []priorityConfig, xdsLBPolicy *internalservi retAddrs = append(retAddrs, addrs...) var odCfgs map[string]*outlierdetection.LBConfig if envconfig.XDSOutlierDetection { - odCfgs = convertClusterImplMapToOutlierDetection(configs, p.mechanism.OutlierDetection) + odCfgs = convertClusterImplMapToOutlierDetection(configs, p.mechanism.outlierDetection) for n, c := range odCfgs { retConfig.Children[n] = &priority.Child{ Config: &internalserviceconfig.BalancerConfig{Name: outlierdetection.Name, Config: c}, @@ -124,7 +124,7 @@ func buildPriorityConfig(priorities []priorityConfig, xdsLBPolicy *internalservi retAddrs = append(retAddrs, addrs...) var odCfg *outlierdetection.LBConfig if envconfig.XDSOutlierDetection { - odCfg = makeClusterImplOutlierDetectionChild(config, p.mechanism.OutlierDetection) + odCfg = makeClusterImplOutlierDetectionChild(config, p.mechanism.outlierDetection) retConfig.Children[name] = &priority.Child{ Config: &internalserviceconfig.BalancerConfig{Name: outlierdetection.Name, Config: odCfg}, // Not ignore re-resolution from DNS children, they will trigger diff --git a/xds/internal/balancer/clusterresolver/configbuilder_test.go b/xds/internal/balancer/clusterresolver/configbuilder_test.go index 6c94cae9ed47..b30686b18561 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder_test.go +++ b/xds/internal/balancer/clusterresolver/configbuilder_test.go @@ -24,6 +24,7 @@ import ( "fmt" "sort" "testing" + "time" "github.com/google/go-cmp/cmp" "google.golang.org/grpc/attributes" @@ -31,7 +32,7 @@ import ( "google.golang.org/grpc/balancer/roundrobin" "google.golang.org/grpc/balancer/weightedroundrobin" "google.golang.org/grpc/internal/hierarchy" - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/resolver" "google.golang.org/grpc/xds/internal" "google.golang.org/grpc/xds/internal/balancer/clusterimpl" @@ -72,7 +73,10 @@ var ( } noopODCfg = outlierdetection.LBConfig{ - Interval: 1<<63 - 1, + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, } ) @@ -194,7 +198,7 @@ func TestBuildPriorityConfig(t *testing.T) { Cluster: testClusterName, Type: DiscoveryMechanismTypeEDS, EDSServiceName: testEDSServiceName, - OutlierDetection: noopODCfg, + outlierDetection: noopODCfg, }, edsResp: xdsresource.EndpointsUpdate{ Localities: []xdsresource.Locality{ @@ -211,7 +215,7 @@ func TestBuildPriorityConfig(t *testing.T) { mechanism: DiscoveryMechanism{ Cluster: testClusterName2, Type: DiscoveryMechanismTypeLogicalDNS, - OutlierDetection: noopODCfg, + outlierDetection: noopODCfg, }, addresses: testAddressStrs[4], childNameGen: newNameGenerator(1), @@ -221,11 +225,14 @@ func TestBuildPriorityConfig(t *testing.T) { wantConfig := &priority.LBConfig{ Children: map[string]*priority.Child{ "priority-0-0": { - Config: &internalserviceconfig.BalancerConfig{ + Config: &iserviceconfig.BalancerConfig{ Name: outlierdetection.Name, Config: &outlierdetection.LBConfig{ - Interval: 1<<63 - 1, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: clusterimpl.Name, Config: &clusterimpl.LBConfig{ Cluster: testClusterName, @@ -238,11 +245,14 @@ func TestBuildPriorityConfig(t *testing.T) { IgnoreReresolutionRequests: true, }, "priority-0-1": { - Config: &internalserviceconfig.BalancerConfig{ + Config: &iserviceconfig.BalancerConfig{ Name: outlierdetection.Name, Config: &outlierdetection.LBConfig{ - Interval: 1<<63 - 1, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: clusterimpl.Name, Config: &clusterimpl.LBConfig{ Cluster: testClusterName, @@ -255,15 +265,18 @@ func TestBuildPriorityConfig(t *testing.T) { IgnoreReresolutionRequests: true, }, "priority-1": { - Config: &internalserviceconfig.BalancerConfig{ + Config: &iserviceconfig.BalancerConfig{ Name: outlierdetection.Name, Config: &outlierdetection.LBConfig{ - Interval: 1<<63 - 1, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + Interval: iserviceconfig.Duration(10 * time.Second), // default interval + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: clusterimpl.Name, Config: &clusterimpl.LBConfig{ Cluster: testClusterName2, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: "pick_first"}, + ChildPolicy: &iserviceconfig.BalancerConfig{Name: "pick_first"}, }, }, }, @@ -283,7 +296,7 @@ func TestBuildClusterImplConfigForDNS(t *testing.T) { wantName := "priority-3" wantConfig := &clusterimpl.LBConfig{ Cluster: testClusterName2, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "pick_first", }, } @@ -500,7 +513,7 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { localities []xdsresource.Locality priorityName string mechanism DiscoveryMechanism - childPolicy *internalserviceconfig.BalancerConfig + childPolicy *iserviceconfig.BalancerConfig wantConfig *clusterimpl.LBConfig wantAddrs []resolver.Address wantErr bool @@ -525,7 +538,7 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { }, }, priorityName: "test-priority", - childPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, + childPolicy: &iserviceconfig.BalancerConfig{Name: roundrobin.Name}, mechanism: DiscoveryMechanism{ Cluster: testClusterName, Type: DiscoveryMechanismTypeEDS, @@ -535,7 +548,7 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { wantConfig: &clusterimpl.LBConfig{ Cluster: testClusterName, EDSServiceName: testEDSService, - ChildPolicy: &internalserviceconfig.BalancerConfig{Name: roundrobin.Name}, + ChildPolicy: &iserviceconfig.BalancerConfig{Name: roundrobin.Name}, }, wantAddrs: []resolver.Address{ testAddrWithAttrs("addr-1-1", 20, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), @@ -565,10 +578,10 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { }, }, priorityName: "test-priority", - childPolicy: &internalserviceconfig.BalancerConfig{Name: ringhash.Name, Config: &ringhash.LBConfig{MinRingSize: 1, MaxRingSize: 2}}, + childPolicy: &iserviceconfig.BalancerConfig{Name: ringhash.Name, Config: &ringhash.LBConfig{MinRingSize: 1, MaxRingSize: 2}}, // lrsServer is nil, so LRS policy will not be used. wantConfig: &clusterimpl.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: ringhash.Name, Config: &ringhash.LBConfig{MinRingSize: 1, MaxRingSize: 2}, }, @@ -638,7 +651,7 @@ func TestConvertClusterImplMapToOutlierDetection(t *testing.T) { wantODCfgs: map[string]*outlierdetection.LBConfig{ "child1": { Interval: 1<<63 - 1, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: clusterimpl.Name, Config: &clusterimpl.LBConfig{ Cluster: "cluster1", @@ -663,7 +676,7 @@ func TestConvertClusterImplMapToOutlierDetection(t *testing.T) { wantODCfgs: map[string]*outlierdetection.LBConfig{ "child1": { Interval: 1<<63 - 1, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: clusterimpl.Name, Config: &clusterimpl.LBConfig{ Cluster: "cluster1", @@ -672,7 +685,7 @@ func TestConvertClusterImplMapToOutlierDetection(t *testing.T) { }, "child2": { Interval: 1<<63 - 1, - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: clusterimpl.Name, Config: &clusterimpl.LBConfig{ Cluster: "cluster2", diff --git a/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go b/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go index c7c2ab9945f0..0be84f7b74fb 100644 --- a/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go +++ b/xds/internal/balancer/clusterresolver/e2e_test/eds_impl_test.go @@ -193,7 +193,8 @@ func (s) TestEDS_OneLocality(t *testing.T) { "discoveryMechanisms": [{ "cluster": "%s", "type": "EDS", - "edsServiceName": "%s" + "edsServiceName": "%s", + "outlierDetection": {} }], "xdsLbPolicy":[{"round_robin":{}}] } @@ -301,7 +302,8 @@ func (s) TestEDS_MultipleLocalities(t *testing.T) { "discoveryMechanisms": [{ "cluster": "%s", "type": "EDS", - "edsServiceName": "%s" + "edsServiceName": "%s", + "outlierDetection": {} }], "xdsLbPolicy":[{"round_robin":{}}] } @@ -422,7 +424,8 @@ func (s) TestEDS_EndpointsHealth(t *testing.T) { "discoveryMechanisms": [{ "cluster": "%s", "type": "EDS", - "edsServiceName": "%s" + "edsServiceName": "%s", + "outlierDetection": {} }], "xdsLbPolicy":[{"round_robin":{}}] } @@ -488,7 +491,8 @@ func (s) TestEDS_EmptyUpdate(t *testing.T) { "discoveryMechanisms": [{ "cluster": "%s", "type": "EDS", - "edsServiceName": "%s" + "edsServiceName": "%s", + "outlierDetection": {} }], "xdsLbPolicy":[{"round_robin":{}}] } diff --git a/xds/internal/balancer/clusterresolver/priority_test.go b/xds/internal/balancer/clusterresolver/priority_test.go index 68325a31c17e..4d2904c67ff6 100644 --- a/xds/internal/balancer/clusterresolver/priority_test.go +++ b/xds/internal/balancer/clusterresolver/priority_test.go @@ -85,7 +85,7 @@ func setupTestEDS(t *testing.T, initChild *internalserviceconfig.BalancerConfig) Cluster: testClusterName, Type: DiscoveryMechanismTypeEDS, }}, - XDSLBPolicy: wrrLocalityLBConfig, + xdsLBPolicy: *wrrLocalityLBConfig, }, }); err != nil { edsb.Close() @@ -855,7 +855,7 @@ func (s) TestFallbackToDNS(t *testing.T) { DNSHostname: testDNSTarget, }, }, - XDSLBPolicy: wrrLocalityLBConfig, + xdsLBPolicy: *wrrLocalityLBConfig, }, }); err != nil { t.Fatal(err) diff --git a/xds/internal/balancer/outlierdetection/balancer.go b/xds/internal/balancer/outlierdetection/balancer.go index 548514f6d05d..eaf4f7fc9ab7 100644 --- a/xds/internal/balancer/outlierdetection/balancer.go +++ b/xds/internal/balancer/outlierdetection/balancer.go @@ -23,7 +23,6 @@ package outlierdetection import ( "encoding/json" - "errors" "fmt" "math" "strings" @@ -41,6 +40,7 @@ import ( "google.golang.org/grpc/internal/grpclog" "google.golang.org/grpc/internal/grpcrand" "google.golang.org/grpc/internal/grpcsync" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" ) @@ -81,19 +81,27 @@ func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Ba } func (bb) ParseConfig(s json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { - var lbCfg *LBConfig - if err := json.Unmarshal(s, &lbCfg); err != nil { // Validates child config if present as well. + lbCfg := &LBConfig{ + // Default top layer values as documented in A50. + Interval: iserviceconfig.Duration(10 * time.Second), + BaseEjectionTime: iserviceconfig.Duration(30 * time.Second), + MaxEjectionTime: iserviceconfig.Duration(300 * time.Second), + MaxEjectionPercent: 10, + } + + // This unmarshalling handles underlying layers sre and fpe which have their + // own defaults for their fields if either sre or fpe are present. + if err := json.Unmarshal(s, lbCfg); err != nil { // Validates child config if present as well. return nil, fmt.Errorf("xds: unable to unmarshal LBconfig: %s, error: %v", string(s), err) } // Note: in the xds flow, these validations will never fail. The xdsclient // performs the same validations as here on the xds Outlier Detection - // resource before parsing into the internal struct which gets marshaled - // into JSON before calling this function. A50 defines two separate places - // for these validations to take place, the xdsclient and this ParseConfig - // method. "When parsing a config from JSON, if any of these requirements is - // violated, that should be treated as a parsing error." - A50 - + // resource before parsing resource into JSON which this function gets + // called with. A50 defines two separate places for these validations to + // take place, the xdsclient and this ParseConfig method. "When parsing a + // config from JSON, if any of these requirements is violated, that should + // be treated as a parsing error." - A50 switch { // "The google.protobuf.Duration fields interval, base_ejection_time, and // max_ejection_time must obey the restrictions in the @@ -122,10 +130,7 @@ func (bb) ParseConfig(s json.RawMessage) (serviceconfig.LoadBalancingConfig, err return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.FailurePercentageEjection.threshold = %v; must be <= 100", lbCfg.FailurePercentageEjection.Threshold) case lbCfg.FailurePercentageEjection != nil && lbCfg.FailurePercentageEjection.EnforcementPercentage > 100: return nil, fmt.Errorf("OutlierDetectionLoadBalancingConfig.FailurePercentageEjection.enforcement_percentage = %v; must be <= 100", lbCfg.FailurePercentageEjection.EnforcementPercentage) - case lbCfg.ChildPolicy == nil: - return nil, errors.New("OutlierDetectionLoadBalancingConfig.child_policy must be present") } - return lbCfg, nil } diff --git a/xds/internal/balancer/outlierdetection/balancer_test.go b/xds/internal/balancer/outlierdetection/balancer_test.go index 4f542d61e572..3d1efe8dcd56 100644 --- a/xds/internal/balancer/outlierdetection/balancer_test.go +++ b/xds/internal/balancer/outlierdetection/balancer_test.go @@ -68,7 +68,20 @@ func (s) TestParseConfig(t *testing.T) { }) parser := bb{} - + const ( + defaultInterval = iserviceconfig.Duration(10 * time.Second) + defaultBaseEjectionTime = iserviceconfig.Duration(30 * time.Second) + defaultMaxEjectionTime = iserviceconfig.Duration(300 * time.Second) + defaultMaxEjectionPercent = 10 + defaultSuccessRateStdevFactor = 1900 + defaultEnforcingSuccessRate = 100 + defaultSuccessRateMinimumHosts = 5 + defaultSuccessRateRequestVolume = 100 + defaultFailurePercentageThreshold = 85 + defaultEnforcingFailurePercentage = 0 + defaultFailurePercentageMinimumHosts = 5 + defaultFailurePercentageRequestVolume = 50 + ) tests := []struct { name string input string @@ -76,8 +89,35 @@ func (s) TestParseConfig(t *testing.T) { wantErr string }{ { - name: "noop-lb-config", + name: "no-fields-set-should-get-default", + input: `{ + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + wantCfg: &LBConfig{ + Interval: defaultInterval, + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: defaultMaxEjectionTime, + MaxEjectionPercent: defaultMaxEjectionPercent, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + + { + name: "some-top-level-fields-set", input: `{ + "interval": "15s", + "maxEjectionTime": "350s", "childPolicy": [ { "xds_cluster_impl_experimental": { @@ -86,7 +126,184 @@ func (s) TestParseConfig(t *testing.T) { } ] }`, + // Should get set fields + defaults for unset fields. + wantCfg: &LBConfig{ + Interval: iserviceconfig.Duration(15 * time.Second), + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: iserviceconfig.Duration(350 * time.Second), + MaxEjectionPercent: defaultMaxEjectionPercent, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + { + name: "success-rate-ejection-present-but-no-fields", + input: `{ + "successRateEjection": {}, + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + // Should get defaults of success-rate-ejection struct. + wantCfg: &LBConfig{ + Interval: defaultInterval, + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: defaultMaxEjectionTime, + MaxEjectionPercent: defaultMaxEjectionPercent, + SuccessRateEjection: &SuccessRateEjection{ + StdevFactor: defaultSuccessRateStdevFactor, + EnforcementPercentage: defaultEnforcingSuccessRate, + MinimumHosts: defaultSuccessRateMinimumHosts, + RequestVolume: defaultSuccessRateRequestVolume, + }, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + { + name: "success-rate-ejection-present-partially-set", + input: `{ + "successRateEjection": { + "stdevFactor": 1000, + "minimumHosts": 5 + }, + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + // Should get set fields + defaults for others in success rate + // ejection layer. + wantCfg: &LBConfig{ + Interval: defaultInterval, + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: defaultMaxEjectionTime, + MaxEjectionPercent: defaultMaxEjectionPercent, + SuccessRateEjection: &SuccessRateEjection{ + StdevFactor: 1000, + EnforcementPercentage: defaultEnforcingSuccessRate, + MinimumHosts: 5, + RequestVolume: defaultSuccessRateRequestVolume, + }, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + { + name: "success-rate-ejection-present-fully-set", + input: `{ + "successRateEjection": { + "stdevFactor": 1000, + "enforcementPercentage": 50, + "minimumHosts": 5, + "requestVolume": 50 + }, + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + wantCfg: &LBConfig{ + Interval: defaultInterval, + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: defaultMaxEjectionTime, + MaxEjectionPercent: defaultMaxEjectionPercent, + SuccessRateEjection: &SuccessRateEjection{ + StdevFactor: 1000, + EnforcementPercentage: 50, + MinimumHosts: 5, + RequestVolume: 50, + }, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + { + name: "failure-percentage-ejection-present-but-no-fields", + input: `{ + "failurePercentageEjection": {}, + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + // Should get defaults of failure percentage ejection layer. + wantCfg: &LBConfig{ + Interval: defaultInterval, + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: defaultMaxEjectionTime, + MaxEjectionPercent: defaultMaxEjectionPercent, + FailurePercentageEjection: &FailurePercentageEjection{ + Threshold: defaultFailurePercentageThreshold, + EnforcementPercentage: defaultEnforcingFailurePercentage, + MinimumHosts: defaultFailurePercentageMinimumHosts, + RequestVolume: defaultFailurePercentageRequestVolume, + }, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + { + name: "failure-percentage-ejection-present-partially-set", + input: `{ + "failurePercentageEjection": { + "threshold": 80, + "minimumHosts": 10 + }, + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + // Should get set fields + defaults for others in success rate + // ejection layer. wantCfg: &LBConfig{ + Interval: defaultInterval, + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: defaultMaxEjectionTime, + MaxEjectionPercent: defaultMaxEjectionPercent, + FailurePercentageEjection: &FailurePercentageEjection{ + Threshold: 80, + EnforcementPercentage: defaultEnforcingFailurePercentage, + MinimumHosts: 10, + RequestVolume: defaultFailurePercentageRequestVolume, + }, ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "xds_cluster_impl_experimental", Config: &clusterimpl.LBConfig{ @@ -96,7 +313,81 @@ func (s) TestParseConfig(t *testing.T) { }, }, { - name: "good-lb-config", + name: "failure-percentage-ejection-present-fully-set", + input: `{ + "failurePercentageEjection": { + "threshold": 80, + "enforcementPercentage": 100, + "minimumHosts": 10, + "requestVolume": 40 + }, + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + wantCfg: &LBConfig{ + Interval: defaultInterval, + BaseEjectionTime: defaultBaseEjectionTime, + MaxEjectionTime: defaultMaxEjectionTime, + MaxEjectionPercent: defaultMaxEjectionPercent, + FailurePercentageEjection: &FailurePercentageEjection{ + Threshold: 80, + EnforcementPercentage: 100, + MinimumHosts: 10, + RequestVolume: 40, + }, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + { // to make sure zero values aren't overwritten by defaults + name: "lb-config-every-field-set-zero-value", + input: `{ + "interval": "0s", + "baseEjectionTime": "0s", + "maxEjectionTime": "0s", + "maxEjectionPercent": 0, + "successRateEjection": { + "stdevFactor": 0, + "enforcementPercentage": 0, + "minimumHosts": 0, + "requestVolume": 0 + }, + "failurePercentageEjection": { + "threshold": 0, + "enforcementPercentage": 0, + "minimumHosts": 0, + "requestVolume": 0 + }, + "childPolicy": [ + { + "xds_cluster_impl_experimental": { + "cluster": "test_cluster" + } + } + ] + }`, + wantCfg: &LBConfig{ + SuccessRateEjection: &SuccessRateEjection{}, + FailurePercentageEjection: &FailurePercentageEjection{}, + ChildPolicy: &iserviceconfig.BalancerConfig{ + Name: "xds_cluster_impl_experimental", + Config: &clusterimpl.LBConfig{ + Cluster: "test_cluster", + }, + }, + }, + }, + { + name: "lb-config-every-field-set", input: `{ "interval": "10s", "baseEjectionTime": "30s", @@ -194,28 +485,6 @@ func (s) TestParseConfig(t *testing.T) { }`, wantErr: "OutlierDetectionLoadBalancingConfig.FailurePercentageEjection.enforcement_percentage = 150; must be <= 100", }, - { - name: "child-policy-not-present", - input: `{ - "interval": "10s", - "baseEjectionTime": "30s", - "maxEjectionTime": "300s", - "maxEjectionPercent": 10, - "successRateEjection": { - "stdevFactor": 1900, - "enforcementPercentage": 100, - "minimumHosts": 5, - "requestVolume": 100 - }, - "failurePercentageEjection": { - "threshold": 85, - "enforcementPercentage": 5, - "minimumHosts": 5, - "requestVolume": 50 - } - }`, - wantErr: "OutlierDetectionLoadBalancingConfig.child_policy must be present", - }, { name: "child-policy-present-but-parse-error", input: `{ @@ -242,26 +511,6 @@ func (s) TestParseConfig(t *testing.T) { }`, wantErr: "invalid loadBalancingConfig: no supported policies found", }, - { - name: "child-policy", - input: `{ - "childPolicy": [ - { - "xds_cluster_impl_experimental": { - "cluster": "test_cluster" - } - } - ] - }`, - wantCfg: &LBConfig{ - ChildPolicy: &iserviceconfig.BalancerConfig{ - Name: "xds_cluster_impl_experimental", - Config: &clusterimpl.LBConfig{ - Cluster: "test_cluster", - }, - }, - }, - }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { diff --git a/xds/internal/balancer/outlierdetection/config.go b/xds/internal/balancer/outlierdetection/config.go index 9c4383cf6ece..2df0aa231e65 100644 --- a/xds/internal/balancer/outlierdetection/config.go +++ b/xds/internal/balancer/outlierdetection/config.go @@ -18,6 +18,8 @@ package outlierdetection import ( + "encoding/json" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/serviceconfig" ) @@ -52,6 +54,24 @@ type SuccessRateEjection struct { RequestVolume uint32 `json:"requestVolume,omitempty"` } +// For UnmarshalJSON to work correctly and set defaults without infinite +// recursion. +type successRateEjection SuccessRateEjection + +// UnmarshalJSON unmarshals JSON into SuccessRateEjection. If a +// SuccessRateEjection field is not set, that field will get it's default value. +func (sre *SuccessRateEjection) UnmarshalJSON(j []byte) error { + sre.StdevFactor = 1900 + sre.EnforcementPercentage = 100 + sre.MinimumHosts = 5 + sre.RequestVolume = 100 + // Unmarshal JSON on a type with zero values for methods, including + // UnmarshalJSON. Overwrites defaults, leaves alone if not. typecast to + // avoid infinite recursion by not recalling this function and causing stack + // overflow. + return json.Unmarshal(j, (*successRateEjection)(sre)) +} + // Equal returns whether the SuccessRateEjection is the same with the parameter. func (sre *SuccessRateEjection) Equal(sre2 *SuccessRateEjection) bool { if sre == nil && sre2 == nil { @@ -99,6 +119,25 @@ type FailurePercentageEjection struct { RequestVolume uint32 `json:"requestVolume,omitempty"` } +// For UnmarshalJSON to work correctly and set defaults without infinite +// recursion. +type failurePercentageEjection FailurePercentageEjection + +// UnmarshalJSON unmarshals JSON into FailurePercentageEjection. If a +// FailurePercentageEjection field is not set, that field will get it's default +// value. +func (fpe *FailurePercentageEjection) UnmarshalJSON(j []byte) error { + fpe.Threshold = 85 + fpe.EnforcementPercentage = 0 + fpe.MinimumHosts = 5 + fpe.RequestVolume = 50 + // Unmarshal JSON on a type with zero values for methods, including + // UnmarshalJSON. Overwrites defaults, leaves alone if not. typecast to + // avoid infinite recursion by not recalling this function and causing stack + // overflow. + return json.Unmarshal(j, (*failurePercentageEjection)(fpe)) +} + // Equal returns whether the FailurePercentageEjection is the same with the // parameter. func (fpe *FailurePercentageEjection) Equal(fpe2 *FailurePercentageEjection) bool { diff --git a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go index afa418815a0b..e8665925739b 100644 --- a/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/tests/unmarshal_cds_test.go @@ -29,7 +29,7 @@ import ( "google.golang.org/grpc/internal/balancer/stub" "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpctest" - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/serviceconfig" _ "google.golang.org/grpc/xds" // Register the xDS LB Registry Converters. @@ -107,7 +107,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { name string cluster *v3clusterpb.Cluster wantUpdate xdsresource.ClusterUpdate - wantLBConfig *internalserviceconfig.BalancerConfig + wantLBConfig *iserviceconfig.BalancerConfig customLBDisabled bool }{ { @@ -142,10 +142,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { ClusterType: xdsresource.ClusterTypeLogicalDNS, DNSHostName: "dns_host:8080", }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "round_robin", }, }, @@ -169,10 +169,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { ClusterName: clusterName, LRSServerConfig: xdsresource.ClusterLRSOff, ClusterType: xdsresource.ClusterTypeAggregate, PrioritizedClusterNames: []string{"a", "b", "c"}, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "round_robin", }, }, @@ -193,10 +193,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, }, wantUpdate: emptyUpdate, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "round_robin", }, }, @@ -218,10 +218,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { LbPolicy: v3clusterpb.Cluster_ROUND_ROBIN, }, wantUpdate: xdsresource.ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSOff}, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "round_robin", }, }, @@ -248,10 +248,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { }, }, wantUpdate: xdsresource.ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf}, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "round_robin", }, }, @@ -290,10 +290,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { }, }, wantUpdate: xdsresource.ClusterUpdate{ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf, MaxRequests: func() *uint32 { i := uint32(512); return &i }()}, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "round_robin", }, }, @@ -322,7 +322,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: "ring_hash_experimental", Config: &ringhash.LBConfig{ MinRingSize: 1024, @@ -359,7 +359,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: "ring_hash_experimental", Config: &ringhash.LBConfig{ MinRingSize: 10, @@ -397,7 +397,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: "ring_hash_experimental", Config: &ringhash.LBConfig{ MinRingSize: 10, @@ -431,10 +431,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "round_robin", }, }, @@ -469,10 +469,10 @@ func (s) TestValidateCluster_Success(t *testing.T) { wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: wrrlocality.Name, Config: &wrrlocality.LBConfig{ - ChildPolicy: &internalserviceconfig.BalancerConfig{ + ChildPolicy: &iserviceconfig.BalancerConfig{ Name: "myorg.MyCustomLeastRequestPolicy", Config: customLBConfig{}, }, @@ -516,7 +516,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: "ring_hash_experimental", Config: &ringhash.LBConfig{ MinRingSize: 20, @@ -562,7 +562,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { wantUpdate: xdsresource.ClusterUpdate{ ClusterName: clusterName, EDSServiceName: serviceName, }, - wantLBConfig: &internalserviceconfig.BalancerConfig{ + wantLBConfig: &iserviceconfig.BalancerConfig{ Name: "ring_hash_experimental", Config: &ringhash.LBConfig{ MinRingSize: 10, @@ -592,7 +592,7 @@ func (s) TestValidateCluster_Success(t *testing.T) { if diff := cmp.Diff(update, test.wantUpdate, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "LBPolicy")); diff != "" { t.Errorf("validateClusterAndConstructClusterUpdate(%+v) got diff: %v (-got, +want)", test.cluster, diff) } - bc := &internalserviceconfig.BalancerConfig{} + bc := &iserviceconfig.BalancerConfig{} if err := json.Unmarshal(update.LBPolicy, bc); err != nil { t.Fatalf("failed to unmarshal JSON: %v", err) } diff --git a/xds/internal/xdsclient/xdsresource/type_cds.go b/xds/internal/xdsclient/xdsresource/type_cds.go index 8ea9608dc9b7..269d9ebdae15 100644 --- a/xds/internal/xdsclient/xdsresource/type_cds.go +++ b/xds/internal/xdsclient/xdsresource/type_cds.go @@ -19,7 +19,6 @@ package xdsresource import ( "encoding/json" - "time" "google.golang.org/protobuf/types/known/anypb" ) @@ -52,71 +51,6 @@ const ( ClusterLRSServerSelf ) -// OutlierDetection is the outlier detection configuration for a cluster. -type OutlierDetection struct { - // Interval is the time interval between ejection analysis sweeps. This can - // result in both new ejections as well as addresses being returned to - // service. Defaults to 10s. - Interval time.Duration - // BaseEjectionTime is the base time that a host is ejected for. The real - // time is equal to the base time multiplied by the number of times the host - // has been ejected and is capped by MaxEjectionTime. Defaults to 30s. - BaseEjectionTime time.Duration - // MaxEjectionTime is the maximum time that an address is ejected for. If - // not specified, the default value (300s) or the BaseEjectionTime value is - // applied, whichever is larger. - MaxEjectionTime time.Duration - // MaxEjectionPercent is the maximum % of an upstream cluster that can be - // ejected due to outlier detection. Defaults to 10% but will eject at least - // one host regardless of the value. - MaxEjectionPercent uint32 - // SuccessRateStdevFactor is used to determine the ejection threshold for - // success rate outlier ejection. The ejection threshold is the difference - // between the mean success rate, and the product of this factor and the - // standard deviation of the mean success rate: mean - (stdev * - // success_rate_stdev_factor). This factor is divided by a thousand to get a - // double. That is, if the desired factor is 1.9, the runtime value should - // be 1900. Defaults to 1900. - SuccessRateStdevFactor uint32 - // EnforcingSuccessRate is the % chance that a host will be actually ejected - // when an outlier status is detected through success rate statistics. This - // setting can be used to disable ejection or to ramp it up slowly. Defaults - // to 100. - EnforcingSuccessRate uint32 - // SuccessRateMinimumHosts is the number of hosts in a cluster that must - // have enough request volume to detect success rate outliers. If the number - // of hosts is less than this setting, outlier detection via success rate - // statistics is not performed for any host in the cluster. Defaults to 5. - SuccessRateMinimumHosts uint32 - // SuccessRateRequestVolume is the minimum number of total requests that - // must be collected in one interval (as defined by the interval duration - // above) to include this host in success rate based outlier detection. If - // the volume is lower than this setting, outlier detection via success rate - // statistics is not performed for that host. Defaults to 100. - SuccessRateRequestVolume uint32 - // FailurePercentageThreshold is the failure percentage to use when - // determining failure percentage-based outlier detection. If the failure - // percentage of a given host is greater than or equal to this value, it - // will be ejected. Defaults to 85. - FailurePercentageThreshold uint32 - // EnforcingFailurePercentage is the % chance that a host will be actually - // ejected when an outlier status is detected through failure percentage - // statistics. This setting can be used to disable ejection or to ramp it up - // slowly. Defaults to 0. - EnforcingFailurePercentage uint32 - // FailurePercentageMinimumHosts is the minimum number of hosts in a cluster - // in order to perform failure percentage-based ejection. If the total - // number of hosts in the cluster is less than this value, failure - // percentage-based ejection will not be performed. Defaults to 5. - FailurePercentageMinimumHosts uint32 - // FailurePercentageRequestVolume is the minimum number of total requests - // that must be collected in one interval (as defined by the interval - // duration above) to perform failure percentage-based ejection for this - // host. If the volume is lower than this setting, failure percentage-based - // ejection will not be performed for this host. Defaults to 50. - FailurePercentageRequestVolume uint32 -} - // ClusterUpdate contains information from a received CDS response, which is of // interest to the registered CDS watcher. type ClusterUpdate struct { @@ -147,7 +81,7 @@ type ClusterUpdate struct { // OutlierDetection is the outlier detection configuration for this cluster. // If nil, it means this cluster does not use the outlier detection feature. - OutlierDetection *OutlierDetection + OutlierDetection json.RawMessage // Raw is the resource from the xds response. Raw *anypb.Any diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go index d07ad2ea1aee..99768f25e305 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go @@ -33,7 +33,7 @@ import ( "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/pretty" - internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/xds/matcher" "google.golang.org/grpc/xds/internal/xdsclient/xdslbregistry" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" @@ -118,7 +118,7 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu // Process outlier detection received from the control plane iff the // corresponding environment variable is set. - var od *OutlierDetection + var od json.RawMessage if envconfig.XDSOutlierDetection { var err error if od, err = outlierConfigFromCluster(cluster); err != nil { @@ -134,7 +134,7 @@ func validateClusterAndConstructClusterUpdate(cluster *v3clusterpb.Cluster) (Clu // "It will be the responsibility of the XdsClient to validate the // converted configuration. It will do this by having the gRPC LB policy // registry parse the configuration." - A52 - bc := &internalserviceconfig.BalancerConfig{} + bc := &iserviceconfig.BalancerConfig{} if err := json.Unmarshal(lbPolicy, bc); err != nil { return ClusterUpdate{}, fmt.Errorf("JSON generated from xDS LB policy registry: %s is invalid: %v", pretty.FormatJSON(lbPolicy), err) } @@ -490,59 +490,87 @@ func circuitBreakersFromCluster(cluster *v3clusterpb.Cluster) *uint32 { return nil } -// outlierConfigFromCluster extracts the relevant outlier detection -// configuration from the received cluster resource. Returns nil if no -// OutlierDetection field set in the cluster resource. -func outlierConfigFromCluster(cluster *v3clusterpb.Cluster) (*OutlierDetection, error) { +// idurationp takes a time.Duration and converts it to an internal duration, and +// returns a pointer to that internal duration. +func idurationp(d time.Duration) *iserviceconfig.Duration { + id := iserviceconfig.Duration(d) + return &id +} + +func uint32p(i uint32) *uint32 { + return &i +} + +// Helper types to prepare Outlier Detection JSON. Pointer types to distinguish +// between unset and a zero value. +type successRateEjection struct { + StdevFactor *uint32 `json:"stdevFactor,omitempty"` + EnforcementPercentage *uint32 `json:"enforcementPercentage,omitempty"` + MinimumHosts *uint32 `json:"minimumHosts,omitempty"` + RequestVolume *uint32 `json:"requestVolume,omitempty"` +} + +type failurePercentageEjection struct { + Threshold *uint32 `json:"threshold,omitempty"` + EnforcementPercentage *uint32 `json:"enforcementPercentage,omitempty"` + MinimumHosts *uint32 `json:"minimumHosts,omitempty"` + RequestVolume *uint32 `json:"requestVolume,omitempty"` +} + +type odLBConfig struct { + Interval *iserviceconfig.Duration `json:"interval,omitempty"` + BaseEjectionTime *iserviceconfig.Duration `json:"baseEjectionTime,omitempty"` + MaxEjectionTime *iserviceconfig.Duration `json:"maxEjectionTime,omitempty"` + MaxEjectionPercent *uint32 `json:"maxEjectionPercent,omitempty"` + SuccessRateEjection *successRateEjection `json:"successRateEjection,omitempty"` + FailurePercentageEjection *failurePercentageEjection `json:"failurePercentageEjection,omitempty"` +} + +// outlierConfigFromCluster converts the received Outlier Detection +// configuration into JSON configuration for Outlier Detection, taking into +// account xDS Defaults. Returns nil if no OutlierDetection field set in the +// cluster resource. +func outlierConfigFromCluster(cluster *v3clusterpb.Cluster) (json.RawMessage, error) { od := cluster.GetOutlierDetection() if od == nil { return nil, nil } - const ( - defaultInterval = 10 * time.Second - defaultBaseEjectionTime = 30 * time.Second - defaultMaxEjectionTime = 300 * time.Second - defaultMaxEjectionPercent = 10 - defaultSuccessRateStdevFactor = 1900 - defaultEnforcingSuccessRate = 100 - defaultSuccessRateMinimumHosts = 5 - defaultSuccessRateRequestVolume = 100 - defaultFailurePercentageThreshold = 85 - defaultEnforcingFailurePercentage = 0 - defaultFailurePercentageMinimumHosts = 5 - defaultFailurePercentageRequestVolume = 50 - ) + + // "The outlier_detection field of the Cluster resource should have its fields + // validated according to the rules for the corresponding LB policy config + // fields in the above "Validation" section. If any of these requirements is + // violated, the Cluster resource should be NACKed." - A50 // "The google.protobuf.Duration fields interval, base_ejection_time, and // max_ejection_time must obey the restrictions in the // google.protobuf.Duration documentation and they must have non-negative // values." - A50 - interval := defaultInterval + var interval *iserviceconfig.Duration if i := od.GetInterval(); i != nil { if err := i.CheckValid(); err != nil { return nil, fmt.Errorf("outlier_detection.interval is invalid with error: %v", err) } - if interval = i.AsDuration(); interval < 0 { - return nil, fmt.Errorf("outlier_detection.interval = %v; must be a valid duration and >= 0", interval) + if interval = idurationp(i.AsDuration()); *interval < 0 { + return nil, fmt.Errorf("outlier_detection.interval = %v; must be a valid duration and >= 0", *interval) } } - baseEjectionTime := defaultBaseEjectionTime + var baseEjectionTime *iserviceconfig.Duration if bet := od.GetBaseEjectionTime(); bet != nil { if err := bet.CheckValid(); err != nil { return nil, fmt.Errorf("outlier_detection.base_ejection_time is invalid with error: %v", err) } - if baseEjectionTime = bet.AsDuration(); baseEjectionTime < 0 { - return nil, fmt.Errorf("outlier_detection.base_ejection_time = %v; must be >= 0", baseEjectionTime) + if baseEjectionTime = idurationp(bet.AsDuration()); *baseEjectionTime < 0 { + return nil, fmt.Errorf("outlier_detection.base_ejection_time = %v; must be >= 0", *baseEjectionTime) } } - maxEjectionTime := defaultMaxEjectionTime + var maxEjectionTime *iserviceconfig.Duration if met := od.GetMaxEjectionTime(); met != nil { if err := met.CheckValid(); err != nil { return nil, fmt.Errorf("outlier_detection.max_ejection_time is invalid: %v", err) } - if maxEjectionTime = met.AsDuration(); maxEjectionTime < 0 { - return nil, fmt.Errorf("outlier_detection.max_ejection_time = %v; must be >= 0", maxEjectionTime) + if maxEjectionTime = idurationp(met.AsDuration()); *maxEjectionTime < 0 { + return nil, fmt.Errorf("outlier_detection.max_ejection_time = %v; must be >= 0", *maxEjectionTime) } } @@ -550,64 +578,95 @@ func outlierConfigFromCluster(cluster *v3clusterpb.Cluster) (*OutlierDetection, // failure_percentage_threshold, and enforcing_failure_percentage must have // values less than or equal to 100. If any of these requirements is // violated, the Cluster resource should be NACKed." - A50 - maxEjectionPercent := uint32(defaultMaxEjectionPercent) + var maxEjectionPercent *uint32 if mep := od.GetMaxEjectionPercent(); mep != nil { - if maxEjectionPercent = mep.GetValue(); maxEjectionPercent > 100 { - return nil, fmt.Errorf("outlier_detection.max_ejection_percent = %v; must be <= 100", maxEjectionPercent) + if maxEjectionPercent = uint32p(mep.GetValue()); *maxEjectionPercent > 100 { + return nil, fmt.Errorf("outlier_detection.max_ejection_percent = %v; must be <= 100", *maxEjectionPercent) } } - enforcingSuccessRate := uint32(defaultEnforcingSuccessRate) + // "if the enforcing_success_rate field is set to 0, the config + // success_rate_ejection field will be null and all success_rate_* fields + // will be ignored." - A50 + var enforcingSuccessRate *uint32 if esr := od.GetEnforcingSuccessRate(); esr != nil { - if enforcingSuccessRate = esr.GetValue(); enforcingSuccessRate > 100 { - return nil, fmt.Errorf("outlier_detection.enforcing_success_rate = %v; must be <= 100", enforcingSuccessRate) + if enforcingSuccessRate = uint32p(esr.GetValue()); *enforcingSuccessRate > 100 { + return nil, fmt.Errorf("outlier_detection.enforcing_success_rate = %v; must be <= 100", *enforcingSuccessRate) } } - failurePercentageThreshold := uint32(defaultFailurePercentageThreshold) + var failurePercentageThreshold *uint32 if fpt := od.GetFailurePercentageThreshold(); fpt != nil { - if failurePercentageThreshold = fpt.GetValue(); failurePercentageThreshold > 100 { - return nil, fmt.Errorf("outlier_detection.failure_percentage_threshold = %v; must be <= 100", failurePercentageThreshold) + if failurePercentageThreshold = uint32p(fpt.GetValue()); *failurePercentageThreshold > 100 { + return nil, fmt.Errorf("outlier_detection.failure_percentage_threshold = %v; must be <= 100", *failurePercentageThreshold) } } - enforcingFailurePercentage := uint32(defaultEnforcingFailurePercentage) + // "If the enforcing_failure_percent field is set to 0 or null, the config + // failure_percent_ejection field will be null and all failure_percent_* + // fields will be ignored." - A50 + var enforcingFailurePercentage *uint32 if efp := od.GetEnforcingFailurePercentage(); efp != nil { - if enforcingFailurePercentage = efp.GetValue(); enforcingFailurePercentage > 100 { - return nil, fmt.Errorf("outlier_detection.enforcing_failure_percentage = %v; must be <= 100", enforcingFailurePercentage) + if enforcingFailurePercentage = uint32p(efp.GetValue()); *enforcingFailurePercentage > 100 { + return nil, fmt.Errorf("outlier_detection.enforcing_failure_percentage = %v; must be <= 100", *enforcingFailurePercentage) } } - successRateStdevFactor := uint32(defaultSuccessRateStdevFactor) + var successRateStdevFactor *uint32 if srsf := od.GetSuccessRateStdevFactor(); srsf != nil { - successRateStdevFactor = srsf.GetValue() + successRateStdevFactor = uint32p(srsf.GetValue()) } - successRateMinimumHosts := uint32(defaultSuccessRateMinimumHosts) + var successRateMinimumHosts *uint32 if srmh := od.GetSuccessRateMinimumHosts(); srmh != nil { - successRateMinimumHosts = srmh.GetValue() + successRateMinimumHosts = uint32p(srmh.GetValue()) } - successRateRequestVolume := uint32(defaultSuccessRateRequestVolume) + var successRateRequestVolume *uint32 if srrv := od.GetSuccessRateRequestVolume(); srrv != nil { - successRateRequestVolume = srrv.GetValue() + successRateRequestVolume = uint32p(srrv.GetValue()) } - failurePercentageMinimumHosts := uint32(defaultFailurePercentageMinimumHosts) + var failurePercentageMinimumHosts *uint32 if fpmh := od.GetFailurePercentageMinimumHosts(); fpmh != nil { - failurePercentageMinimumHosts = fpmh.GetValue() + failurePercentageMinimumHosts = uint32p(fpmh.GetValue()) } - failurePercentageRequestVolume := uint32(defaultFailurePercentageRequestVolume) + var failurePercentageRequestVolume *uint32 if fprv := od.GetFailurePercentageRequestVolume(); fprv != nil { - failurePercentageRequestVolume = fprv.GetValue() - } - - return &OutlierDetection{ - Interval: interval, - BaseEjectionTime: baseEjectionTime, - MaxEjectionTime: maxEjectionTime, - MaxEjectionPercent: maxEjectionPercent, - EnforcingSuccessRate: enforcingSuccessRate, - FailurePercentageThreshold: failurePercentageThreshold, - EnforcingFailurePercentage: enforcingFailurePercentage, - SuccessRateStdevFactor: successRateStdevFactor, - SuccessRateMinimumHosts: successRateMinimumHosts, - SuccessRateRequestVolume: successRateRequestVolume, - FailurePercentageMinimumHosts: failurePercentageMinimumHosts, - FailurePercentageRequestVolume: failurePercentageRequestVolume, - }, nil + failurePercentageRequestVolume = uint32p(fprv.GetValue()) + } + + // "if the enforcing_success_rate field is set to 0, the config + // success_rate_ejection field will be null and all success_rate_* fields + // will be ignored." - A50 + var sre *successRateEjection + if enforcingSuccessRate == nil || *enforcingSuccessRate != 0 { + sre = &successRateEjection{ + StdevFactor: successRateStdevFactor, + EnforcementPercentage: enforcingSuccessRate, + MinimumHosts: successRateMinimumHosts, + RequestVolume: successRateRequestVolume, + } + } + + // "If the enforcing_failure_percent field is set to 0 or null, the config + // failure_percent_ejection field will be null and all failure_percent_* + // fields will be ignored." - A50 + var fpe *failurePercentageEjection + if enforcingFailurePercentage != nil && *enforcingFailurePercentage != 0 { + fpe = &failurePercentageEjection{ + Threshold: failurePercentageThreshold, + EnforcementPercentage: enforcingFailurePercentage, + MinimumHosts: failurePercentageMinimumHosts, + RequestVolume: failurePercentageRequestVolume, + } + } + + odLBCfg := &odLBConfig{ + Interval: interval, + BaseEjectionTime: baseEjectionTime, + MaxEjectionTime: maxEjectionTime, + MaxEjectionPercent: maxEjectionPercent, + SuccessRateEjection: sre, + FailurePercentageEjection: fpe, + } + odLBCfgJSON, err := json.Marshal(odLBCfg) + if err != nil { + return nil, err + } + return odLBCfgJSON, nil } diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go index 0c69d27ad42d..e057b951326d 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds_test.go @@ -18,10 +18,10 @@ package xdsresource import ( + "encoding/json" "regexp" "strings" "testing" - "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -30,8 +30,6 @@ import ( "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/xds/matcher" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" - "google.golang.org/protobuf/types/known/durationpb" - "google.golang.org/protobuf/types/known/wrapperspb" v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" @@ -43,6 +41,8 @@ import ( v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" v3matcherpb "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3" anypb "github.com/golang/protobuf/ptypes/any" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/wrapperspb" ) const ( @@ -1382,43 +1382,55 @@ func (s) TestValidateClusterWithOutlierDetection(t *testing.T) { OutlierDetection: od, } } - odToClusterUpdate := func(od *OutlierDetection) ClusterUpdate { - return ClusterUpdate{ - ClusterName: clusterName, - LRSServerConfig: ClusterLRSOff, - OutlierDetection: od, - } - } tests := []struct { - name string - cluster *v3clusterpb.Cluster - wantUpdate ClusterUpdate - wantErr bool + name string + cluster *v3clusterpb.Cluster + wantODCfg string + wantErr bool }{ { - name: "successful-case-all-defaults", - // Outlier detection proto is present without any fields specified, - // so should trigger all default values in the update. - cluster: odToClusterProto(&v3clusterpb.OutlierDetection{}), - wantUpdate: odToClusterUpdate(&OutlierDetection{ - Interval: 10 * time.Second, - BaseEjectionTime: 30 * time.Second, - MaxEjectionTime: 300 * time.Second, - MaxEjectionPercent: 10, - SuccessRateStdevFactor: 1900, - EnforcingSuccessRate: 100, - SuccessRateMinimumHosts: 5, - SuccessRateRequestVolume: 100, - FailurePercentageThreshold: 85, - EnforcingFailurePercentage: 0, - FailurePercentageMinimumHosts: 5, - FailurePercentageRequestVolume: 50, + name: "success-and-failure-null", + cluster: odToClusterProto(&v3clusterpb.OutlierDetection{}), + wantODCfg: `{"successRateEjection": {}}`, + }, + { + name: "success-and-failure-zero", + cluster: odToClusterProto(&v3clusterpb.OutlierDetection{ + EnforcingSuccessRate: &wrapperspb.UInt32Value{Value: 0}, // Thus doesn't create sre - to focus on fpe + EnforcingFailurePercentage: &wrapperspb.UInt32Value{Value: 0}, }), + wantODCfg: `{}`, }, { - name: "successful-case-all-fields-configured-and-valid", + name: "some-fields-set", cluster: odToClusterProto(&v3clusterpb.OutlierDetection{ + Interval: &durationpb.Duration{Seconds: 1}, + MaxEjectionTime: &durationpb.Duration{Seconds: 3}, + EnforcingSuccessRate: &wrapperspb.UInt32Value{Value: 3}, + SuccessRateRequestVolume: &wrapperspb.UInt32Value{Value: 5}, + EnforcingFailurePercentage: &wrapperspb.UInt32Value{Value: 7}, + FailurePercentageRequestVolume: &wrapperspb.UInt32Value{Value: 9}, + }), + wantODCfg: `{ + "interval": "1s", + "maxEjectionTime": "3s", + "successRateEjection": { + "enforcementPercentage": 3, + "requestVolume": 5 + }, + "failurePercentageEjection": { + "enforcementPercentage": 7, + "requestVolume": 9 + } + }`, + }, + { + name: "every-field-set-non-zero", + cluster: odToClusterProto(&v3clusterpb.OutlierDetection{ + // all fields set (including ones that will be layered) should + // pick up those too and explicitly all fields, including those + // put in layers, in the JSON generated. Interval: &durationpb.Duration{Seconds: 1}, BaseEjectionTime: &durationpb.Duration{Seconds: 2}, MaxEjectionTime: &durationpb.Duration{Seconds: 3}, @@ -1432,20 +1444,24 @@ func (s) TestValidateClusterWithOutlierDetection(t *testing.T) { FailurePercentageMinimumHosts: &wrapperspb.UInt32Value{Value: 8}, FailurePercentageRequestVolume: &wrapperspb.UInt32Value{Value: 9}, }), - wantUpdate: odToClusterUpdate(&OutlierDetection{ - Interval: time.Second, - BaseEjectionTime: time.Second * 2, - MaxEjectionTime: time.Second * 3, - MaxEjectionPercent: 1, - SuccessRateStdevFactor: 2, - EnforcingSuccessRate: 3, - SuccessRateMinimumHosts: 4, - SuccessRateRequestVolume: 5, - FailurePercentageThreshold: 6, - EnforcingFailurePercentage: 7, - FailurePercentageMinimumHosts: 8, - FailurePercentageRequestVolume: 9, - }), + wantODCfg: `{ + "interval": "1s", + "baseEjectionTime": "2s", + "maxEjectionTime": "3s", + "maxEjectionPercent": 1, + "successRateEjection": { + "stdevFactor": 2, + "enforcementPercentage": 3, + "minimumHosts": 4, + "requestVolume": 5 + }, + "failurePercentageEjection": { + "threshold": 6, + "enforcementPercentage": 7, + "minimumHosts": 8, + "requestVolume": 9 + } + }`, }, { name: "interval-is-negative", @@ -1507,8 +1523,21 @@ func (s) TestValidateClusterWithOutlierDetection(t *testing.T) { if (err != nil) != test.wantErr { t.Errorf("validateClusterAndConstructClusterUpdate() returned err %v wantErr %v)", err, test.wantErr) } - if diff := cmp.Diff(test.wantUpdate, update, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(ClusterUpdate{}, "LBPolicy")); diff != "" { - t.Errorf("validateClusterAndConstructClusterUpdate() returned unexpected diff (-want, +got):\n%s", diff) + if test.wantErr { + return + } + // got and want must be unmarshalled since JSON strings shouldn't + // generally be directly compared. + var got map[string]interface{} + if err := json.Unmarshal(update.OutlierDetection, &got); err != nil { + t.Fatalf("Error unmarshalling update.OutlierDetection (%q): %v", update.OutlierDetection, err) + } + var want map[string]interface{} + if err := json.Unmarshal(json.RawMessage(test.wantODCfg), &want); err != nil { + t.Fatalf("Error unmarshalling wantODCfg (%q): %v", test.wantODCfg, err) + } + if diff := cmp.Diff(got, want); diff != "" { + t.Fatalf("cluster.OutlierDetection got unexpected output, diff (-got, +want): %v", diff) } }) } From a2552fe2cd15ac00136ced9d22c1564f603db96e Mon Sep 17 00:00:00 2001 From: Zach Reyes Date: Fri, 9 Jun 2023 14:38:27 -0400 Subject: [PATCH 2/6] Responded to Doug's comments --- internal/balancer/nop/nop.go | 60 +++++++++++++++++++ test/xds/xds_client_outlier_detection_test.go | 2 +- .../balancer/cdsbalancer/cdsbalancer.go | 5 +- .../clusterresolver/clusterresolver.go | 33 +++++----- .../balancer/outlierdetection/config.go | 27 ++++++++- .../xdsclient/xdsresource/unmarshal_cds.go | 6 +- 6 files changed, 105 insertions(+), 28 deletions(-) create mode 100644 internal/balancer/nop/nop.go diff --git a/internal/balancer/nop/nop.go b/internal/balancer/nop/nop.go new file mode 100644 index 000000000000..52c1ea039e50 --- /dev/null +++ b/internal/balancer/nop/nop.go @@ -0,0 +1,60 @@ +/* + * + * Copyright 2023 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// Package nop implements a balancer with all of it's balancer operations as +// no-ops, other than returning a Transient Failure Picker on a Client Conn +// update. +package nop + +import ( + "errors" + + "google.golang.org/grpc/balancer" + "google.golang.org/grpc/balancer/base" + "google.golang.org/grpc/connectivity" +) + +// Balancer is a balancer with all of it's balancer operations as no-ops, other +// than returning a Transient Failure Picker on a Client Conn update. +type Balancer struct { + cc balancer.ClientConn +} + +// NewNOPBalancer returns a no-op balancer. +func NewNOPBalancer(cc balancer.ClientConn) *Balancer { + return &Balancer{cc: cc} +} + +// UpdateClientConnState updates the Balancer's Client Conn with an Error Picker +// and a Connectivity State of TRANSIENT_FAILURE. +func (b *Balancer) UpdateClientConnState(_ balancer.ClientConnState) error { + b.cc.UpdateState(balancer.State{ + Picker: base.NewErrPicker(errors.New("no-op balancer invoked")), + ConnectivityState: connectivity.TransientFailure, + }) + return nil +} + +// ResolverError is a no-op. +func (b *Balancer) ResolverError(_ error) {} + +// UpdateSubConnState is a no-op. +func (b *Balancer) UpdateSubConnState(_ balancer.SubConn, _ balancer.SubConnState) {} + +// Close is a no-op. +func (b *Balancer) Close() {} diff --git a/test/xds/xds_client_outlier_detection_test.go b/test/xds/xds_client_outlier_detection_test.go index 67271d3b50c5..d91b35a883aa 100644 --- a/test/xds/xds_client_outlier_detection_test.go +++ b/test/xds/xds_client_outlier_detection_test.go @@ -267,7 +267,7 @@ func (s) TestOutlierDetectionXDSDefaultOn(t *testing.T) { // Configure CDS resources with Outlier Detection set but // EnforcingSuccessRate unset. This should cause Outlier Detection to be // configured with SuccessRateEjection present in configuration, which will - // eventually be populated with it's default values along with the knobs set + // eventually be populated with its default values along with the knobs set // as SuccessRate fields in the proto, and thus Outlier Detection should be // on and actively eject upstreams. const serviceName = "my-service-client-side-xds" diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 1ffb26b19a8b..7fc408b11383 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -27,6 +27,7 @@ import ( "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/tls/certprovider" + "google.golang.org/grpc/internal/balancer/nop" "google.golang.org/grpc/internal/buffer" xdsinternal "google.golang.org/grpc/internal/credentials/xds" "google.golang.org/grpc/internal/envconfig" @@ -78,13 +79,13 @@ func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Bal // Shouldn't happen, registered through imported Cluster Resolver, // defensive programming. logger.Errorf("%q LB policy is needed but not registered", clusterresolver.Name) - return nil + return nop.NewNOPBalancer(cc) } crParser, ok := builder.(balancer.ConfigParser) if !ok { // Shouldn't happen, imported Cluster Resolver builder has this method. logger.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name) - return nil + return nop.NewNOPBalancer(cc) } b := &cdsBalancer{ bOpts: opts, diff --git a/xds/internal/balancer/clusterresolver/clusterresolver.go b/xds/internal/balancer/clusterresolver/clusterresolver.go index f79f06d7b07c..b028dd9d993c 100644 --- a/xds/internal/balancer/clusterresolver/clusterresolver.go +++ b/xds/internal/balancer/clusterresolver/clusterresolver.go @@ -30,6 +30,7 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/base" "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/internal/balancer/nop" "google.golang.org/grpc/internal/buffer" "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpclog" @@ -64,12 +65,12 @@ func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Bal priorityBuilder := balancer.Get(priority.Name) if priorityBuilder == nil { logger.Errorf("%q LB policy is needed but not registered", priority.Name) - return nil + return nop.NewNOPBalancer(cc) } priorityConfigParser, ok := priorityBuilder.(balancer.ConfigParser) if !ok { logger.Errorf("%q LB policy does not implement a config parser", priority.Name) - return nil + return nop.NewNOPBalancer(cc) } b := &clusterResolverBalancer{ @@ -116,23 +117,19 @@ func (bb) ParseConfig(j json.RawMessage) (serviceconfig.LoadBalancingConfig, err return nil, fmt.Errorf("unable to unmarshal balancer config %s into cluster-resolver config, error: %v", string(j), err) } - odCfgs := make([]outlierdetection.LBConfig, len(cfg.DiscoveryMechanisms)) - for i, dm := range cfg.DiscoveryMechanisms { - lbCfg, err := odParser.ParseConfig(dm.OutlierDetection) - if err != nil { - return nil, fmt.Errorf("error parsing Outlier Detection config: %v", dm.OutlierDetection) - } - odCfg, ok := lbCfg.(*outlierdetection.LBConfig) - if !ok { - // Shouldn't happen, Parser built at build time with Outlier Detection - // builder pulled from gRPC LB Registry. - return nil, fmt.Errorf("odParser returned config with unexpected type %T: %v", lbCfg, lbCfg) - } - odCfgs[i] = *odCfg - } if envconfig.XDSOutlierDetection { - for i, odCfg := range odCfgs { - cfg.DiscoveryMechanisms[i].outlierDetection = odCfg + for i, dm := range cfg.DiscoveryMechanisms { + lbCfg, err := odParser.ParseConfig(dm.OutlierDetection) + if err != nil { + return nil, fmt.Errorf("error parsing Outlier Detection config %v: %v", dm.OutlierDetection, err) + } + odCfg, ok := lbCfg.(*outlierdetection.LBConfig) + if !ok { + // Shouldn't happen, Parser built at build time with Outlier Detection + // builder pulled from gRPC LB Registry. + return nil, fmt.Errorf("odParser returned config with unexpected type %T: %v", lbCfg, lbCfg) + } + cfg.DiscoveryMechanisms[i].outlierDetection = *odCfg } } if err := json.Unmarshal(cfg.XDSLBPolicy, &cfg.xdsLBPolicy); err != nil { diff --git a/xds/internal/balancer/outlierdetection/config.go b/xds/internal/balancer/outlierdetection/config.go index 2df0aa231e65..196a562ed69d 100644 --- a/xds/internal/balancer/outlierdetection/config.go +++ b/xds/internal/balancer/outlierdetection/config.go @@ -19,6 +19,7 @@ package outlierdetection import ( "encoding/json" + "time" iserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/serviceconfig" @@ -59,7 +60,7 @@ type SuccessRateEjection struct { type successRateEjection SuccessRateEjection // UnmarshalJSON unmarshals JSON into SuccessRateEjection. If a -// SuccessRateEjection field is not set, that field will get it's default value. +// SuccessRateEjection field is not set, that field will get its default value. func (sre *SuccessRateEjection) UnmarshalJSON(j []byte) error { sre.StdevFactor = 1900 sre.EnforcementPercentage = 100 @@ -124,7 +125,7 @@ type FailurePercentageEjection struct { type failurePercentageEjection FailurePercentageEjection // UnmarshalJSON unmarshals JSON into FailurePercentageEjection. If a -// FailurePercentageEjection field is not set, that field will get it's default +// FailurePercentageEjection field is not set, that field will get its default // value. func (fpe *FailurePercentageEjection) UnmarshalJSON(j []byte) error { fpe.Threshold = 85 @@ -188,6 +189,28 @@ type LBConfig struct { ChildPolicy *iserviceconfig.BalancerConfig `json:"childPolicy,omitempty"` } +// For UnmarshalJSON to work correctly and set defaults without infinite +// recursion. +type lbConfig LBConfig + +// UnmarshalJSON unmarshals JSON into LBConfig. If a top level LBConfig field +// (i.e. not next layer sre or fpe) is not set, that field will get its default +// value. If sre or fpe is not set, it will stay unset, otherwise it will +// unmarshal on those types populating with default values for their fields if +// needed. +func (lbc *LBConfig) UnmarshalJSON(j []byte) error { + // Default top layer values as documented in A50. + lbc.Interval = iserviceconfig.Duration(10 * time.Second) + lbc.BaseEjectionTime = iserviceconfig.Duration(30 * time.Second) + lbc.MaxEjectionTime = iserviceconfig.Duration(300 * time.Second) + lbc.MaxEjectionPercent = 10 + // Unmarshal JSON on a type with zero values for methods, including + // UnmarshalJSON. Overwrites defaults, leaves alone if not. typecast to + // avoid infinite recursion by not recalling this function and causing stack + // overflow. + return json.Unmarshal(j, (*lbConfig)(lbc)) +} + // EqualIgnoringChildPolicy returns whether the LBConfig is same with the // parameter outside of the child policy, only comparing the Outlier Detection // specific configuration. diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go index 99768f25e305..9f8530111a73 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_cds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_cds.go @@ -664,9 +664,5 @@ func outlierConfigFromCluster(cluster *v3clusterpb.Cluster) (json.RawMessage, er SuccessRateEjection: sre, FailurePercentageEjection: fpe, } - odLBCfgJSON, err := json.Marshal(odLBCfg) - if err != nil { - return nil, err - } - return odLBCfgJSON, nil + return json.Marshal(odLBCfg) } From 183d67f79be1f464f73fbf4defae4700bb017f9b Mon Sep 17 00:00:00 2001 From: Zach Reyes Date: Fri, 9 Jun 2023 16:21:01 -0400 Subject: [PATCH 3/6] Responded to Doug's comments --- internal/balancer/nop/nop.go | 20 ++++++++++--------- .../balancer/cdsbalancer/cdsbalancer.go | 4 ++-- .../clusterresolver/clusterresolver.go | 4 ++-- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/internal/balancer/nop/nop.go b/internal/balancer/nop/nop.go index 52c1ea039e50..18f9b2fe8520 100644 --- a/internal/balancer/nop/nop.go +++ b/internal/balancer/nop/nop.go @@ -16,35 +16,37 @@ * */ -// Package nop implements a balancer with all of it's balancer operations as +// Package nop implements a balancer with all of its balancer operations as // no-ops, other than returning a Transient Failure Picker on a Client Conn // update. package nop import ( - "errors" - "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/base" "google.golang.org/grpc/connectivity" ) -// Balancer is a balancer with all of it's balancer operations as no-ops, other +// Balancer is a balancer with all of its balancer operations as no-ops, other // than returning a Transient Failure Picker on a Client Conn update. type Balancer struct { - cc balancer.ClientConn + cc balancer.ClientConn + err error } -// NewNOPBalancer returns a no-op balancer. -func NewNOPBalancer(cc balancer.ClientConn) *Balancer { - return &Balancer{cc: cc} +// NewBalancer returns a no-op balancer. +func NewBalancer(cc balancer.ClientConn, err error) *Balancer { + return &Balancer{ + cc: cc, + err: err, + } } // UpdateClientConnState updates the Balancer's Client Conn with an Error Picker // and a Connectivity State of TRANSIENT_FAILURE. func (b *Balancer) UpdateClientConnState(_ balancer.ClientConnState) error { b.cc.UpdateState(balancer.State{ - Picker: base.NewErrPicker(errors.New("no-op balancer invoked")), + Picker: base.NewErrPicker(b.err), ConnectivityState: connectivity.TransientFailure, }) return nil diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 7fc408b11383..bcdeaf681ab5 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -79,13 +79,13 @@ func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Bal // Shouldn't happen, registered through imported Cluster Resolver, // defensive programming. logger.Errorf("%q LB policy is needed but not registered", clusterresolver.Name) - return nop.NewNOPBalancer(cc) + return nop.NewBalancer(cc, fmt.Errorf("%q LB policy is needed but not registered", clusterresolver.Name)) } crParser, ok := builder.(balancer.ConfigParser) if !ok { // Shouldn't happen, imported Cluster Resolver builder has this method. logger.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name) - return nop.NewNOPBalancer(cc) + return nop.NewBalancer(cc, fmt.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name)) } b := &cdsBalancer{ bOpts: opts, diff --git a/xds/internal/balancer/clusterresolver/clusterresolver.go b/xds/internal/balancer/clusterresolver/clusterresolver.go index b028dd9d993c..5eadd1ac1d0e 100644 --- a/xds/internal/balancer/clusterresolver/clusterresolver.go +++ b/xds/internal/balancer/clusterresolver/clusterresolver.go @@ -65,12 +65,12 @@ func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Bal priorityBuilder := balancer.Get(priority.Name) if priorityBuilder == nil { logger.Errorf("%q LB policy is needed but not registered", priority.Name) - return nop.NewNOPBalancer(cc) + return nop.NewBalancer(cc, fmt.Errorf("%q LB policy is needed but not registered", priority.Name)) } priorityConfigParser, ok := priorityBuilder.(balancer.ConfigParser) if !ok { logger.Errorf("%q LB policy does not implement a config parser", priority.Name) - return nop.NewNOPBalancer(cc) + return nop.NewBalancer(cc, fmt.Errorf("%q LB policy does not implement a config parser", priority.Name)) } b := &clusterResolverBalancer{ From 65f380fb2aa6e21874401ee37c50068137681e01 Mon Sep 17 00:00:00 2001 From: Zach Reyes Date: Fri, 9 Jun 2023 19:09:37 -0400 Subject: [PATCH 4/6] unexport --- internal/balancer/nop/nop.go | 20 +++++++++---------- .../balancer/cdsbalancer/cdsbalancer.go | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/internal/balancer/nop/nop.go b/internal/balancer/nop/nop.go index 18f9b2fe8520..0c96f1b81186 100644 --- a/internal/balancer/nop/nop.go +++ b/internal/balancer/nop/nop.go @@ -27,24 +27,24 @@ import ( "google.golang.org/grpc/connectivity" ) -// Balancer is a balancer with all of its balancer operations as no-ops, other -// than returning a Transient Failure Picker on a Client Conn update. -type Balancer struct { +// bal is a balancer with all of its balancer operations as no-ops, other than +// returning a Transient Failure Picker on a Client Conn update. +type bal struct { cc balancer.ClientConn err error } // NewBalancer returns a no-op balancer. -func NewBalancer(cc balancer.ClientConn, err error) *Balancer { - return &Balancer{ +func NewBalancer(cc balancer.ClientConn, err error) balancer.Balancer { + return &bal{ cc: cc, err: err, } } -// UpdateClientConnState updates the Balancer's Client Conn with an Error Picker +// UpdateClientConnState updates the bal's Client Conn with an Error Picker // and a Connectivity State of TRANSIENT_FAILURE. -func (b *Balancer) UpdateClientConnState(_ balancer.ClientConnState) error { +func (b *bal) UpdateClientConnState(_ balancer.ClientConnState) error { b.cc.UpdateState(balancer.State{ Picker: base.NewErrPicker(b.err), ConnectivityState: connectivity.TransientFailure, @@ -53,10 +53,10 @@ func (b *Balancer) UpdateClientConnState(_ balancer.ClientConnState) error { } // ResolverError is a no-op. -func (b *Balancer) ResolverError(_ error) {} +func (b *bal) ResolverError(_ error) {} // UpdateSubConnState is a no-op. -func (b *Balancer) UpdateSubConnState(_ balancer.SubConn, _ balancer.SubConnState) {} +func (b *bal) UpdateSubConnState(_ balancer.SubConn, _ balancer.SubConnState) {} // Close is a no-op. -func (b *Balancer) Close() {} +func (b *bal) Close() {} diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index bcdeaf681ab5..c87fc67b7cbb 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -160,7 +160,7 @@ type exitIdle struct{} // cdsBalancer implements a CDS based LB policy. It instantiates a // cluster_resolver balancer to further resolve the serviceName received from -// CDS, into localities and endpoints. Implements the balancer.Balancer +// CDS, into localities and endpoints. Implements the balancer.bal // interface which is exposed to gRPC and implements the balancer.ClientConn // interface which is exposed to the cluster_resolver balancer. type cdsBalancer struct { From e5aaa8eb69c9f2d59e5128f1b319738841642029 Mon Sep 17 00:00:00 2001 From: Zach Reyes Date: Fri, 9 Jun 2023 19:12:07 -0400 Subject: [PATCH 5/6] Whoops --- internal/balancer/nop/nop.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/balancer/nop/nop.go b/internal/balancer/nop/nop.go index 0c96f1b81186..c07d5ef493ab 100644 --- a/internal/balancer/nop/nop.go +++ b/internal/balancer/nop/nop.go @@ -42,7 +42,7 @@ func NewBalancer(cc balancer.ClientConn, err error) balancer.Balancer { } } -// UpdateClientConnState updates the bal's Client Conn with an Error Picker +// UpdateClientConnState updates the Balancer's Client Conn with an Error Picker // and a Connectivity State of TRANSIENT_FAILURE. func (b *bal) UpdateClientConnState(_ balancer.ClientConnState) error { b.cc.UpdateState(balancer.State{ From f37646d084b276e13e23ccabdde2db3de8471375 Mon Sep 17 00:00:00 2001 From: Zach Reyes Date: Fri, 9 Jun 2023 19:19:17 -0400 Subject: [PATCH 6/6] whoops part 2, electric bugaloo --- internal/balancer/nop/nop.go | 2 +- xds/internal/balancer/cdsbalancer/cdsbalancer.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/balancer/nop/nop.go b/internal/balancer/nop/nop.go index c07d5ef493ab..0c96f1b81186 100644 --- a/internal/balancer/nop/nop.go +++ b/internal/balancer/nop/nop.go @@ -42,7 +42,7 @@ func NewBalancer(cc balancer.ClientConn, err error) balancer.Balancer { } } -// UpdateClientConnState updates the Balancer's Client Conn with an Error Picker +// UpdateClientConnState updates the bal's Client Conn with an Error Picker // and a Connectivity State of TRANSIENT_FAILURE. func (b *bal) UpdateClientConnState(_ balancer.ClientConnState) error { b.cc.UpdateState(balancer.State{ diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index c87fc67b7cbb..bcdeaf681ab5 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -160,7 +160,7 @@ type exitIdle struct{} // cdsBalancer implements a CDS based LB policy. It instantiates a // cluster_resolver balancer to further resolve the serviceName received from -// CDS, into localities and endpoints. Implements the balancer.bal +// CDS, into localities and endpoints. Implements the balancer.Balancer // interface which is exposed to gRPC and implements the balancer.ClientConn // interface which is exposed to the cluster_resolver balancer. type cdsBalancer struct {