-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
xds/outlierdetection: fix config handling #6361
Changes from 2 commits
e9c6d27
a2552fe
183d67f
65f380f
e5aaa8e
f37646d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* | ||
* Copyright 2023 gRPC authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
|
||
// Package nop implements a balancer with all of it's balancer operations as | ||
// no-ops, other than returning a Transient Failure Picker on a Client Conn | ||
// update. | ||
package nop | ||
|
||
import ( | ||
"errors" | ||
|
||
"google.golang.org/grpc/balancer" | ||
"google.golang.org/grpc/balancer/base" | ||
"google.golang.org/grpc/connectivity" | ||
) | ||
|
||
// Balancer is a balancer with all of it's balancer operations as no-ops, other | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :/. Haha switched. |
||
// than returning a Transient Failure Picker on a Client Conn update. | ||
type Balancer struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unexport, then you don't need all these comments. Only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had it unexported, but it was throwing errors wrt lint. I had to change it to pass lint :/ |
||
cc balancer.ClientConn | ||
} | ||
|
||
// NewNOPBalancer returns a no-op balancer. | ||
func NewNOPBalancer(cc balancer.ClientConn) *Balancer { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah ok. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
return &Balancer{cc: cc} | ||
} | ||
|
||
// UpdateClientConnState updates the Balancer's Client Conn with an Error Picker | ||
// and a Connectivity State of TRANSIENT_FAILURE. | ||
func (b *Balancer) UpdateClientConnState(_ balancer.ClientConnState) error { | ||
b.cc.UpdateState(balancer.State{ | ||
Picker: base.NewErrPicker(errors.New("no-op balancer invoked")), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error returned here should be an error passed to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
ConnectivityState: connectivity.TransientFailure, | ||
}) | ||
return nil | ||
} | ||
|
||
// ResolverError is a no-op. | ||
func (b *Balancer) ResolverError(_ error) {} | ||
|
||
// UpdateSubConnState is a no-op. | ||
func (b *Balancer) UpdateSubConnState(_ balancer.SubConn, _ balancer.SubConnState) {} | ||
|
||
// Close is a no-op. | ||
func (b *Balancer) Close() {} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,17 +27,16 @@ import ( | |
"google.golang.org/grpc/connectivity" | ||
"google.golang.org/grpc/credentials" | ||
"google.golang.org/grpc/credentials/tls/certprovider" | ||
"google.golang.org/grpc/internal/balancer/nop" | ||
"google.golang.org/grpc/internal/buffer" | ||
xdsinternal "google.golang.org/grpc/internal/credentials/xds" | ||
"google.golang.org/grpc/internal/envconfig" | ||
"google.golang.org/grpc/internal/grpclog" | ||
"google.golang.org/grpc/internal/grpcsync" | ||
"google.golang.org/grpc/internal/pretty" | ||
internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" | ||
"google.golang.org/grpc/resolver" | ||
"google.golang.org/grpc/serviceconfig" | ||
"google.golang.org/grpc/xds/internal/balancer/clusterresolver" | ||
"google.golang.org/grpc/xds/internal/balancer/outlierdetection" | ||
"google.golang.org/grpc/xds/internal/xdsclient" | ||
"google.golang.org/grpc/xds/internal/xdsclient/xdsresource" | ||
) | ||
|
@@ -75,11 +74,25 @@ type bb struct{} | |
|
||
// Build creates a new CDS balancer with the ClientConn. | ||
func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { | ||
builder := balancer.Get(clusterresolver.Name) | ||
if builder == nil { | ||
// Shouldn't happen, registered through imported Cluster Resolver, | ||
// defensive programming. | ||
logger.Errorf("%q LB policy is needed but not registered", clusterresolver.Name) | ||
return nop.NewNOPBalancer(cc) | ||
} | ||
crParser, ok := builder.(balancer.ConfigParser) | ||
if !ok { | ||
// Shouldn't happen, imported Cluster Resolver builder has this method. | ||
logger.Errorf("%q LB policy does not implement a config parser", clusterresolver.Name) | ||
return nop.NewNOPBalancer(cc) | ||
} | ||
b := &cdsBalancer{ | ||
bOpts: opts, | ||
updateCh: buffer.NewUnbounded(), | ||
closed: grpcsync.NewEvent(), | ||
done: grpcsync.NewEvent(), | ||
crParser: crParser, | ||
xdsHI: xdsinternal.NewHandshakeInfo(nil, nil), | ||
} | ||
b.logger = prefixLogger((b)) | ||
|
@@ -160,6 +173,7 @@ type cdsBalancer struct { | |
logger *grpclog.PrefixLogger | ||
closed *grpcsync.Event | ||
done *grpcsync.Event | ||
crParser balancer.ConfigParser | ||
|
||
// The certificate providers are cached here to that they can be closed when | ||
// a new provider is to be created. | ||
|
@@ -271,52 +285,6 @@ func buildProviderFunc(configs map[string]*certprovider.BuildableConfig, instanc | |
return provider, nil | ||
} | ||
|
||
func outlierDetectionToConfig(od *xdsresource.OutlierDetection) outlierdetection.LBConfig { // Already validated - no need to return error | ||
if od == nil { | ||
// "If the outlier_detection field is not set in the Cluster message, a | ||
// "no-op" outlier_detection config will be generated, with interval set | ||
// to the maximum possible value and all other fields unset." - A50 | ||
return outlierdetection.LBConfig{ | ||
Interval: 1<<63 - 1, | ||
} | ||
} | ||
|
||
// "if the enforcing_success_rate field is set to 0, the config | ||
// success_rate_ejection field will be null and all success_rate_* fields | ||
// will be ignored." - A50 | ||
var sre *outlierdetection.SuccessRateEjection | ||
if od.EnforcingSuccessRate != 0 { | ||
sre = &outlierdetection.SuccessRateEjection{ | ||
StdevFactor: od.SuccessRateStdevFactor, | ||
EnforcementPercentage: od.EnforcingSuccessRate, | ||
MinimumHosts: od.SuccessRateMinimumHosts, | ||
RequestVolume: od.SuccessRateRequestVolume, | ||
} | ||
} | ||
|
||
// "If the enforcing_failure_percent field is set to 0 or null, the config | ||
// failure_percent_ejection field will be null and all failure_percent_* | ||
// fields will be ignored." - A50 | ||
var fpe *outlierdetection.FailurePercentageEjection | ||
if od.EnforcingFailurePercentage != 0 { | ||
fpe = &outlierdetection.FailurePercentageEjection{ | ||
Threshold: od.FailurePercentageThreshold, | ||
EnforcementPercentage: od.EnforcingFailurePercentage, | ||
MinimumHosts: od.FailurePercentageMinimumHosts, | ||
RequestVolume: od.FailurePercentageRequestVolume, | ||
} | ||
} | ||
|
||
return outlierdetection.LBConfig{ | ||
Interval: internalserviceconfig.Duration(od.Interval), | ||
BaseEjectionTime: internalserviceconfig.Duration(od.BaseEjectionTime), | ||
MaxEjectionTime: internalserviceconfig.Duration(od.MaxEjectionTime), | ||
MaxEjectionPercent: od.MaxEjectionPercent, | ||
SuccessRateEjection: sre, | ||
FailurePercentageEjection: fpe, | ||
} | ||
} | ||
|
||
// handleWatchUpdate handles a watch update from the xDS Client. Good updates | ||
// lead to clientConn updates being invoked on the underlying cluster_resolver balancer. | ||
func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { | ||
|
@@ -390,28 +358,43 @@ func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { | |
b.logger.Infof("Unexpected cluster type %v when handling update from cluster handler", cu.ClusterType) | ||
} | ||
if envconfig.XDSOutlierDetection { | ||
dms[i].OutlierDetection = outlierDetectionToConfig(cu.OutlierDetection) | ||
odJSON := cu.OutlierDetection | ||
// "In the cds LB policy, if the outlier_detection field is not set in | ||
// the Cluster resource, a "no-op" outlier_detection config will be | ||
// generated in the corresponding DiscoveryMechanism config, with all | ||
// fields unset." - A50 | ||
if odJSON == nil { | ||
// This will pick up top level defaults in Cluster Resolver | ||
// ParseConfig, but sre and fpe will be nil still so still a | ||
// "no-op" config. | ||
odJSON = json.RawMessage(`{}`) | ||
} | ||
Comment on lines
+362
to
+371
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this logic be moved to where we produce the JSON OD config from the proto instead? This is part of converting from xds OD config to OD's JSON config. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, unfortunately not, because the language in the gRFC explicitly states "in the cds lb policy". The issue was I mapped that language to the paragraph following to. We triaged this, and this was the only behavior scoped to the cds lb policy. @murgatroid99 |
||
dms[i].OutlierDetection = odJSON | ||
} | ||
} | ||
|
||
// Prepare Cluster Resolver config, marshal into JSON, and then Parse it to | ||
// get configuration to send downward to Cluster Resolver. | ||
lbCfg := &clusterresolver.LBConfig{ | ||
DiscoveryMechanisms: dms, | ||
XDSLBPolicy: update.lbPolicy, | ||
} | ||
crLBCfgJSON, err := json.Marshal(lbCfg) | ||
if err != nil { | ||
// Shouldn't happen, since we just prepared struct. | ||
b.logger.Errorf("cds_balancer: error marshalling prepared config: %v", lbCfg) | ||
return | ||
} | ||
|
||
bc := &internalserviceconfig.BalancerConfig{} | ||
if err := json.Unmarshal(update.lbPolicy, bc); err != nil { | ||
// This will never occur, valid configuration is emitted from the xDS | ||
// Client. Validity is already checked in the xDS Client, however, this | ||
// double validation is present because Unmarshalling and Validating are | ||
// coupled into one json.Unmarshal operation). We will switch this in | ||
// the future to two separate operations. | ||
b.logger.Errorf("Emitted lbPolicy %s from xDS Client is invalid: %v", update.lbPolicy, err) | ||
var sc serviceconfig.LoadBalancingConfig | ||
if sc, err = b.crParser.ParseConfig(crLBCfgJSON); err != nil { | ||
b.logger.Errorf("cds_balancer: cluster_resolver config generated %v is invalid: %v", crLBCfgJSON, err) | ||
return | ||
} | ||
lbCfg.XDSLBPolicy = bc | ||
|
||
ccState := balancer.ClientConnState{ | ||
ResolverState: xdsclient.SetClient(resolver.State{}, b.xdsClient), | ||
BalancerConfig: lbCfg, | ||
BalancerConfig: sc, | ||
} | ||
if err := b.childLB.UpdateClientConnState(ccState); err != nil { | ||
b.logger.Errorf("Encountered error when sending config {%+v} to child policy: %v", ccState, err) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:/. Haha switched.