You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read,
Copyright2015TheKubernetes Authors.
LicensedundertheApacheLicense, Version2.0 (the"License");
youmaynotusethisfileexceptincompliancewiththe License.
YoumayobtainacopyoftheLicenseathttp://www.apache.org/licenses/LICENSE-2.0Unlessrequiredbyapplicablelaworagreedtoinwriting, softwaredistributedundertheLicenseisdistributedonan"AS IS"BASIS,
WITHOUTWARRANTIESORCONDITIONSOFANYKIND, eitherexpressor implied.
SeetheLicenseforthespecificlanguagegoverningpermissionsandlimitationsundertheLicense.
*/packagecontrollerimport (
"context""fmt""time"corev1"k8s.io/api/core/v1""k8s.io/apimachinery/pkg/api/errors"metav1"k8s.io/apimachinery/pkg/apis/meta/v1""k8s.io/apimachinery/pkg/util/net""k8s.io/apimachinery/pkg/util/runtime""k8s.io/apimachinery/pkg/util/sets""k8s.io/apimachinery/pkg/util/wait"corev1client"k8s.io/client-go/kubernetes/typed/core/v1""k8s.io/client-go/tools/record""k8s.io/client-go/util/retry""k8s.io/kubernetes/pkg/api/legacyscheme"api"k8s.io/kubernetes/pkg/apis/core""k8s.io/kubernetes/pkg/registry/core/rangeallocation""k8s.io/kubernetes/pkg/registry/core/service/portallocator"
)
// See ipallocator/controller/repair.go; this is a copy for ports.typeRepairstruct {
intervaltime.DurationserviceClientcorev1client.ServicesGetterportRangenet.PortRangeallocrangeallocation.RangeRegistryleaksmap[int]int// counter per leaked portrecorderrecord.EventRecorder
}
// How many times we need to detect a leak before we clean up. This is to// avoid races between allocating a ports and using it.constnumRepairsBeforeLeakCleanup=3// NewRepair creates a controller that periodically ensures that all ports are uniquely allocated across the cluster// and generates informational warnings for a cluster that is not in sync.funcNewRepair(interval time.Duration, serviceClient corev1client.ServicesGetter, eventClient corev1client.EventsGetter, portRange net.PortRange, alloc rangeallocation.RangeRegistry) *Repair {
eventBroadcaster:=record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&corev1client.EventSinkImpl{Interface: eventClient.Events("")})
recorder:=eventBroadcaster.NewRecorder(legacyscheme.Scheme, corev1.EventSource{Component: "portallocator-repair-controller"})
return&Repair{
interval: interval,
serviceClient: serviceClient,
portRange: portRange,
alloc: alloc,
leaks: map[int]int{},
recorder: recorder,
}
}
// RunUntil starts the controller until the provided ch is closed.func (c*Repair) RunUntil(chchanstruct{}) {
wait.Until(func() {
iferr:=c.RunOnce(); err!=nil {
runtime.HandleError(err)
}
}, c.interval, ch)
}
// RunOnce verifies the state of the port allocations and returns an error if an unrecoverable problem occurs.func (c*Repair) RunOnce() error {
returnretry.RetryOnConflict(retry.DefaultBackoff, c.runOnce)
}
// runOnce verifies the state of the port allocations and returns an error if an unrecoverable problem occurs.func (c*Repair) runOnce() error {
// TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read,// or if they are executed against different leaders,// the ordering guarantee required to ensure no port is allocated twice is violated.// ListServices must return a ResourceVersion higher than the etcd index Get triggers,// and the release code must not release services that have had ports allocated but not yet been created// See #8295// If etcd server is not running we should wait for some time and fail only then. This is particularly// important when we start apiserver and etcd at the same time.varsnapshot*api.RangeAllocationerr:=wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) {
varerrerrorsnapshot, err=c.alloc.Get()
returnerr==nil, err
})
iferr!=nil {
returnfmt.Errorf("unable to refresh the port allocations: %v", err)
}
// If not yet initialized.ifsnapshot.Range=="" {
snapshot.Range=c.portRange.String()
}
// Create an allocator because it is easy to use.stored, err:=portallocator.NewFromSnapshot(snapshot)
iferr!=nil {
returnfmt.Errorf("unable to rebuild allocator from snapshot: %v", err)
}
// We explicitly send no resource version, since the resource version// of 'snapshot' is from a different collection, it's not comparable to// the service collection. The caching layer keeps per-collection RVs,// and this is proper, since in theory the collections could be hosted// in separate etcd (or even non-etcd) instances.list, err:=c.serviceClient.Services(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{})
iferr!=nil {
returnfmt.Errorf("unable to refresh the port block: %v", err)
}
rebuilt, err:=portallocator.NewPortAllocator(c.portRange)
iferr!=nil {
returnfmt.Errorf("unable to create port allocator: %v", err)
}
// Check every Service's ports, and rebuild the state as we think it should be.fori:=rangelist.Items {
svc:=&list.Items[i]
ports:=collectServiceNodePorts(svc)
iflen(ports) ==0 {
continue
}
for_, port:=rangeports {
switcherr:=rebuilt.Allocate(port); err {
casenil:
ifstored.Has(port) {
// remove it from the old set, so we can find leaksstored.Release(port)
} else {
// doesn't seem to be allocatedc.recorder.Eventf(svc, corev1.EventTypeWarning, "PortNotAllocated", "Port %d is not allocated; repairing", port)
runtime.HandleError(fmt.Errorf("the node port %d for service %s/%s is not allocated; repairing", port, svc.Name, svc.Namespace))
}
delete(c.leaks, port) // it is used, so it can't be leakedcaseportallocator.ErrAllocated:
// port is duplicate, reallocatec.recorder.Eventf(svc, corev1.EventTypeWarning, "PortAlreadyAllocated", "Port %d was assigned to multiple services; please recreate service", port)
runtime.HandleError(fmt.Errorf("the node port %d for service %s/%s was assigned to multiple services; please recreate", port, svc.Name, svc.Namespace))
caseerr.(*portallocator.ErrNotInRange):
// port is out of range, reallocatec.recorder.Eventf(svc, corev1.EventTypeWarning, "PortOutOfRange", "Port %d is not within the port range %s; please recreate service", port, c.portRange)
runtime.HandleError(fmt.Errorf("the port %d for service %s/%s is not within the port range %s; please recreate", port, svc.Name, svc.Namespace, c.portRange))
caseportallocator.ErrFull:
// somehow we are out of portsc.recorder.Eventf(svc, corev1.EventTypeWarning, "PortRangeFull", "Port range %s is full; you must widen the port range in order to create new services", c.portRange)
returnfmt.Errorf("the port range %s is full; you must widen the port range in order to create new services", c.portRange)
default:
c.recorder.Eventf(svc, corev1.EventTypeWarning, "UnknownError", "Unable to allocate port %d due to an unknown error", port)
returnfmt.Errorf("unable to allocate port %d for service %s/%s due to an unknown error, exiting: %v", port, svc.Name, svc.Namespace, err)
}
}
}
// Check for ports that are left in the old set. They appear to have been leaked.stored.ForEach(func(portint) {
count, found:=c.leaks[port]
switch {
case!found:
// flag it to be cleaned up after any races (hopefully) are goneruntime.HandleError(fmt.Errorf("the node port %d may have leaked: flagging for later clean up", port))
count=numRepairsBeforeLeakCleanup-1fallthroughcasecount>0:
// pretend it is still in use until count expiresc.leaks[port] =count-1iferr:=rebuilt.Allocate(port); err!=nil {
runtime.HandleError(fmt.Errorf("the node port %d may have leaked, but can not be allocated: %v", port, err))
}
default:
// do not add it to the rebuilt set, which means it will be available for reuseruntime.HandleError(fmt.Errorf("the node port %d appears to have leaked: cleaning up", port))
}
})
// Blast the rebuilt state into storage.iferr:=rebuilt.Snapshot(snapshot); err!=nil {
returnfmt.Errorf("unable to snapshot the updated port allocations: %v", err)
}
iferr:=c.alloc.CreateOrUpdate(snapshot); err!=nil {
iferrors.IsConflict(err) {
returnerr
}
returnfmt.Errorf("unable to persist the updated port allocations: %v", err)
}
returnnil
}
// collectServiceNodePorts returns nodePorts specified in the Service.// Please note that:// 1. same nodePort with *same* protocol will be duplicated as it is// 2. same nodePort with *different* protocol will be deduplicatedfunccollectServiceNodePorts(service*corev1.Service) []int {
varservicePorts []int// map from nodePort to set of protocolsseen:=make(map[int]sets.String)
for_, port:=rangeservice.Spec.Ports {
nodePort:=int(port.NodePort)
ifnodePort==0 {
continue
}
proto:=string(port.Protocol)
s:=seen[nodePort]
ifs==nil { // have not seen this nodePort befores=sets.NewString(proto)
servicePorts=append(servicePorts, nodePort)
} elseifs.Has(proto) { // same nodePort with same protocolservicePorts=append(servicePorts, nodePort)
} else { // same nodePort with different protocols.Insert(proto)
}
seen[nodePort] =s
}
healthPort:=int(service.Spec.HealthCheckNodePort)
ifhealthPort!=0 {
s:=seen[healthPort]
// TODO: is it safe to assume the protocol is always TCP?ifs==nil||s.Has(string(corev1.ProtocolTCP)) {
servicePorts=append(servicePorts, healthPort)
}
}
returnservicePorts
}
ewfilemode100644ndex0000000000000..e9a4dc242b30c++b/pkg/registry/core/service/portallocator/controller/repair_test.go
7b2f68ad8aa0c365bed1686efc763af3ddc766a8
The text was updated successfully, but these errors were encountered:
[(per smarterclayton) if Get() or ListServices(] if Get() or ListServices() is a weak consistency read,
or if they are executed against different leaders,
the ordering guarantee required to ensure no port is allocated twice is violated.
ListServices must return a ResourceVersion higher than the etcd index Get triggers,
and the release code must not release services that have had ports allocated but not yet been created
See kubernetes#8295
kubernetes/pkg/registry/core/service/portallocator/controller/repair.go
Line 87 in dcda038
7b2f68ad8aa0c365bed1686efc763af3ddc766a8
The text was updated successfully, but these errors were encountered: