Skip to content

Commit

Permalink
MAISTRA-2149: Make IOR robust in multiple replicas (maistra#282)
Browse files Browse the repository at this point in the history
In scenarios where multiple replicas of istiod are running,
only one IOR should be in charge of keeping routes in sync
with Istio Gateways. We achieve this by making sure IOR only
runs in the leader replica.

Also, because leader election is not 100% acurate, meaning
that for a small window of time there might be two instances
being the leader - which could lead to duplicated routes
being created if a new gateway is created in that time frame -
we also change the way the Route name is created: Instead of
having a generateName field, we now explicitly pass a name to
the Route object to be created. Being deterministic, it allows
the Route creation to fail when there's already a Route object
with the same name (created by the other leader in that time frame).

Use an exclusive leader ID for IOR

Manual cherrypick of maistra#275
  • Loading branch information
jwendell authored and luksa committed Jun 30, 2021
1 parent d447afa commit c382989
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 3 deletions.
9 changes: 8 additions & 1 deletion pilot/pkg/bootstrap/configcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,14 @@ func (s *Server) initConfigController(args *PilotArgs) error {
s.environment.IstioConfigStore = model.MakeIstioStore(s.configController)

if features.EnableIOR {
ior.Register(s.kubeClient, s.configController, args.Namespace, s.kubeClient.GetMemberRoll())
s.addStartFunc(func(stop <-chan struct{}) error {
go leaderelection.
NewLeaderElection(args.Namespace, args.PodName, leaderelection.IORController, s.kubeClient).
AddRunFunction(func(stop <-chan struct{}) {
ior.Register(s.kubeClient, s.configController, args.Namespace, s.kubeClient.GetMemberRoll(), stop)
}).Run(stop)
return nil
})
}

// Defer starting the controller until after the service is created.
Expand Down
17 changes: 16 additions & 1 deletion pilot/pkg/config/kube/ior/ior.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
var iorLog = log.RegisterScope("ior", "IOR logging", 0)

// Register configures IOR component to respond to Gateway creations and removals
func Register(client kubernetes.Interface, store model.ConfigStoreCache, pilotNamespace string, mrc controller.MemberRollController) {
func Register(client kubernetes.Interface, store model.ConfigStoreCache, pilotNamespace string, mrc controller.MemberRollController, stop <-chan struct{}) {
iorLog.Info("Registering IOR component")

if !isRouteSupported(client) {
Expand All @@ -44,8 +44,23 @@ func Register(client kubernetes.Interface, store model.ConfigStoreCache, pilotNa
return
}

alive := true
go func(stop <-chan struct{}) {
// Stop responding to events when we are no longer a leader.
// Two notes here:
// (1) There's no such method "UnregisterEventHandler()"
// (2) It might take a few seconds to this channel to be closed. So, both pods might be leader for a few seconds.
<-stop
iorLog.Info("This pod is no longer a leader. IOR stopped responding")
alive = false
}(stop)

kind := collections.IstioNetworkingV1Alpha3Gateways.Resource().GroupVersionKind()
store.RegisterEventHandler(kind, func(_, curr config.Config, event model.Event) {
if !alive {
return
}

// encapsulate in goroutine to not slow down processing because of waiting for mutex
go func() {
_, ok := curr.Spec.(*networking.Gateway)
Expand Down
15 changes: 14 additions & 1 deletion pilot/pkg/config/kube/ior/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package ior

import (
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"sync"
Expand Down Expand Up @@ -201,7 +203,7 @@ func (r *route) createRoute(metadata config.Meta, gateway *networking.Gateway, o

nr, err := r.client.Routes(serviceNamespace).Create(context.TODO(), &v1.Route{
ObjectMeta: metav1.ObjectMeta{
GenerateName: fmt.Sprintf("%s-%s-", metadata.Namespace, metadata.Name),
Name: fmt.Sprintf("%s-%s-%s", metadata.Namespace, metadata.Name, hostHash(actualHost)),
Labels: map[string]string{
generatedByLabel: generatedByValue,
gatewayNamespaceLabel: metadata.Namespace,
Expand Down Expand Up @@ -298,3 +300,14 @@ func findConfig(list []config.Config, name, namespace, resourceVersion string) (
}
return config.Config{}, fmt.Errorf("config not found")
}

// hostHash applies a sha256 on the host and truncate it to the first 8 bytes
// This gives enough uniqueness for a given hostname
func hostHash(name string) string {
if name == "" {
name = "star"
}

hash := sha256.Sum256([]byte(name))
return hex.EncodeToString(hash[:8])
}
1 change: 1 addition & 0 deletions pilot/pkg/leaderelection/leaderelection.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const (
IngressController = "istio-leader"
StatusController = "istio-status-leader"
AnalyzeController = "istio-analyze-leader"
IORController = "ior-leader"
)

type LeaderElection struct {
Expand Down

0 comments on commit c382989

Please sign in to comment.