-
Notifications
You must be signed in to change notification settings - Fork 255
NMI retries and ticker for periodic sync reconcile #272
Changes from 2 commits
cbc9374
51ed824
53865c8
a89d143
c358db9
b64e15b
99401bd
62e3502
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -267,7 +267,7 @@ func (c *Client) ListPodIds(podns, podname string) (*[]aadpodid.AzureIdentity, e | |
|
||
var matchedIds []aadpodid.AzureIdentity | ||
for _, v := range azAssignedIDList.(*aadpodid.AzureAssignedIdentityList).Items { | ||
if v.Spec.Pod == podname && v.Spec.PodNamespace == podns { | ||
if v.Spec.Pod == podname && v.Spec.PodNamespace == podns && v.Status.Status == "Assigned" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make these states a constant ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That was my initial plan, but the only confusion is IdentityCreated is an int (EventType) in the v1 pkg. |
||
matchedIds = append(matchedIds, *v.Spec.AzureIdentityRef) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we please add a comment in the next PR indicating the backward compatibility aspects. |
||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
package server | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"errors" | ||
"fmt" | ||
|
@@ -27,6 +28,8 @@ import ( | |
const ( | ||
iptableUpdateTimeIntervalInSeconds = 60 | ||
localhost = "127.0.0.1" | ||
listPodIDsRetryAttempts = 7 | ||
listPodIDsRetryIntervalInSeconds = 5 | ||
) | ||
|
||
// Server encapsulates all of the parameters necessary for starting up | ||
|
@@ -153,6 +156,8 @@ func (fn appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { | |
|
||
func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) { | ||
hostIP := parseRemoteAddr(r.RemoteAddr) | ||
rqClientID, rqResource := parseRequestClientIDAndResource(r) | ||
|
||
if hostIP != localhost { | ||
msg := "request remote address is not from a host" | ||
logger.Error(msg) | ||
|
@@ -167,15 +172,15 @@ func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.R | |
return | ||
} | ||
|
||
podIDs, err := s.KubeClient.ListPodIds(podns, podname) | ||
if err != nil || len(*podIDs) == 0 { | ||
podIDs, err := listPodIDsWithRetry(r.Context(), s.KubeClient, logger, podns, podname, rqClientID, listPodIDsRetryAttempts) | ||
if err != nil { | ||
msg := fmt.Sprintf("no AzureAssignedIdentity found for pod:%s/%s", podns, podname) | ||
logger.Errorf("%s, %+v", msg, err) | ||
http.Error(w, msg, http.StatusForbidden) | ||
http.Error(w, msg, http.StatusNotFound) | ||
return | ||
} | ||
|
||
// filter out if we are in namesoaced mode | ||
// filter out if we are in namespaced mode | ||
filterPodIdentities := []aadpodid.AzureIdentity{} | ||
for _, val := range *(podIDs) { | ||
if s.IsNamespaced || aadpodid.IsNamespacedIdentity(&val) { | ||
|
@@ -193,7 +198,6 @@ func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.R | |
} | ||
} | ||
podIDs = &filterPodIdentities | ||
rqClientID, rqResource := parseRequestClientIDAndResource(r) | ||
token, clientID, err := getTokenForMatchingID(s.KubeClient, logger, rqClientID, rqResource, podIDs) | ||
if err != nil { | ||
logger.Errorf("failed to get service principal token for pod:%s/%s, %+v", podns, podname, err) | ||
|
@@ -220,6 +224,8 @@ func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.R | |
// configured id. | ||
func (s *Server) msiHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) { | ||
podIP := parseRemoteAddr(r.RemoteAddr) | ||
rqClientID, rqResource := parseRequestClientIDAndResource(r) | ||
|
||
if podIP == "" { | ||
msg := "request remote address is empty" | ||
logger.Error(msg) | ||
|
@@ -232,14 +238,15 @@ func (s *Server) msiHandler(logger *log.Entry, w http.ResponseWriter, r *http.Re | |
http.Error(w, err.Error(), http.StatusInternalServerError) | ||
return | ||
} | ||
podIDs, err := s.KubeClient.ListPodIds(podns, podname) | ||
if err != nil || len(*podIDs) == 0 { | ||
|
||
podIDs, err := listPodIDsWithRetry(r.Context(), s.KubeClient, logger, podns, podname, rqClientID, listPodIDsRetryAttempts) | ||
if err != nil { | ||
msg := fmt.Sprintf("no AzureAssignedIdentity found for pod:%s/%s", podns, podname) | ||
logger.Errorf("%s, %+v", msg, err) | ||
http.Error(w, msg, http.StatusForbidden) | ||
http.Error(w, msg, http.StatusNotFound) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hosthandler and mishandler looks identical through most parts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please unify the new code which you are adding into one single function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll add this as a TODO and refactor in the next PR. Have a few questions on why we check filter identities using force namespaced only in 1 handler and not another. We can go over that during the review. |
||
return | ||
} | ||
rqClientID, rqResource := parseRequestClientIDAndResource(r) | ||
|
||
token, _, err := getTokenForMatchingID(s.KubeClient, logger, rqClientID, rqResource, podIDs) | ||
if err != nil { | ||
logger.Errorf("failed to get service principal token for pod:%s/%s, %+v", podns, podname, err) | ||
|
@@ -380,3 +387,36 @@ func handleTermination() { | |
log.Infof("Exiting with %v", exitCode) | ||
os.Exit(exitCode) | ||
} | ||
|
||
func listPodIDsWithRetry(ctx context.Context, kubeClient k8s.Client, logger *log.Entry, podns, podname, rqClientID string, maxAttempts int) (*[]aadpodid.AzureIdentity, error) { | ||
attempt := 0 | ||
var err error | ||
var podIDs *[]aadpodid.AzureIdentity | ||
|
||
for attempt < maxAttempts { | ||
podIDs, err = kubeClient.ListPodIds(podns, podname) | ||
if err == nil && len(*podIDs) != 0 { | ||
if len(rqClientID) == 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are two kinds of error, one is that after a few seconds, we still did not find the AzureAssignedIdentity and another is that it never moved to Assigned. Not able to gather that different from retry attempts and varying time durations for them..? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The retry attempts are not different based on the states. The final desired state is |
||
return podIDs, nil | ||
} | ||
// if client id exists in request, we need to ensure the identity with this client | ||
// exists and is in Assigned state | ||
for _, podID := range *podIDs { | ||
if strings.EqualFold(rqClientID, podID.Spec.ClientID) { | ||
return podIDs, nil | ||
} | ||
} | ||
} | ||
|
||
attempt++ | ||
|
||
select { | ||
case <-time.After(listPodIDsRetryIntervalInSeconds * time.Second): | ||
case <-ctx.Done(): | ||
err = ctx.Err() | ||
return nil, err | ||
} | ||
logger.Warningf("failed to get assigned ids for pod:%s/%s, retrying attempt: %d", podns, podname, attempt) | ||
} | ||
return nil, fmt.Errorf("getting assigned identities for pod %s/%s failed after %d attempts. Error: %v", podns, podname, maxAttempts, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will this error propagate to the caller in the application ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this will be propagated if it fails after retries. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,7 +111,7 @@ var _ = Describe("Kubernetes cluster using aad-pod-identity", func() { | |
fmt.Println("\nTearing down the test environment...") | ||
|
||
// Ensure a clean cluster after the end of each test | ||
cmd := exec.Command("kubectl", "delete", "AzureIdentity,AzureIdentityBinding,AzureAssignedIdentity", "--all") | ||
cmd := exec.Command("kubectl", "delete", "AzureIdentity,AzureIdentityBinding", "--all") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing azureassignedidentity from the delete command. It should be deleted by MIC once the identity and binding are removed and it's a good test to ensure we reliably delete the assigned identities. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if that is the exception we should add the check at the end to see if no AzureAssignedIdentities exists and fail the test if it does. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already ensure the length of azureassignedidentities is equal to 0 at the end of |
||
util.PrintCommand(cmd) | ||
_, err := cmd.CombinedOutput() | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
@@ -627,7 +627,7 @@ func setUpIdentityAndDeployment(azureIdentityName, suffix, replicas string) { | |
Expect(err).NotTo(HaveOccurred()) | ||
Expect(ok).To(Equal(true)) | ||
|
||
time.Sleep(30 * time.Second) | ||
//time.Sleep(30 * time.Second) | ||
} | ||
|
||
// validateAzureAssignedIdentity will make sure a given AzureAssignedIdentity has the correct properties | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change to duration var