-
Notifications
You must be signed in to change notification settings - Fork 326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
remove health checks controller and use endpoints controller for health checks #472
Changes from 3 commits
ec10114
0139f80
c4fda60
76cff69
e06fa33
bd34924
4f2b3bc
2d23557
f1a3cad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ import ( | |
"fmt" | ||
"strings" | ||
|
||
mapset "github.com/deckarep/golang-set" | ||
"github.com/deckarep/golang-set" | ||
"github.com/go-logr/logr" | ||
"github.com/hashicorp/consul-k8s/consul" | ||
"github.com/hashicorp/consul/api" | ||
|
@@ -25,9 +25,15 @@ import ( | |
) | ||
|
||
const ( | ||
MetaKeyPodName = "pod-name" | ||
MetaKeyKubeServiceName = "k8s-service-name" | ||
MetaKeyKubeNS = "k8s-namespace" | ||
MetaKeyPodName = "pod-name" | ||
MetaKeyKubeServiceName = "k8s-service-name" | ||
MetaKeyKubeNS = "k8s-namespace" | ||
kubernetesSuccessReasonMsg = "Kubernetes health checks passing" | ||
podPendingReasonMsg = "Pod is pending" | ||
|
||
// labelInject is the label which is applied by the connect-inject webhook to all pods. | ||
// This is the key controllers will use on the label filter for its lister, watcher and reconciler. | ||
labelInject = "consul.hashicorp.com/connect-inject-status" | ||
kschoche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
type EndpointsController struct { | ||
|
@@ -133,6 +139,18 @@ func (r *EndpointsController) Reconcile(ctx context.Context, req ctrl.Request) ( | |
r.Log.Error(err, "failed to register proxy service with Consul", "consul-proxy-service-name", proxyServiceRegistration.Name) | ||
return ctrl.Result{}, err | ||
} | ||
|
||
// Update the TTL health check for the service. | ||
// This is required because ServiceRegister() does not update the TTL if the service already exists. | ||
r.Log.Info("updating ttl health check", "service", proxyServiceRegistration.Name) | ||
kschoche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
status, reason, err := getReadyStatusAndReason(&pod) | ||
kschoche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
err = client.Agent().UpdateTTL(getConsulHealthCheckID(&pod, serviceRegistration.ID), reason, status) | ||
kschoche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -210,13 +228,26 @@ func (r *EndpointsController) createServiceRegistrations(pod corev1.Pod, service | |
tags = append(tags, strings.Split(raw, ",")...) | ||
} | ||
|
||
status, _, err := getReadyStatusAndReason(&pod) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we dont use the "reason" field here because setting the Message field in the check on registration causes the Notes/Output field to get jumbled the next time we issue an update which occurs a couple ms later. It's fine and only has the effect of there being no Notes field until the UpdateTTL() is issues a couple ms later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sounds good. What exactly gets jumbled here though just for my own understanding? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Notes field gets set and you cannot override it with later update.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a really good note and I think it would be good as a comment in the code since I'm sure readers would wonder! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, I'll add a comment about it! |
||
if err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
service := &api.AgentServiceRegistration{ | ||
ID: serviceID, | ||
Name: serviceName, | ||
Port: servicePort, | ||
Address: pod.Status.PodIP, | ||
Meta: meta, | ||
Namespace: "", // TODO: namespace support | ||
Check: &api.AgentServiceCheck{ | ||
CheckID: getConsulHealthCheckID(&pod, serviceID), | ||
Name: "Kubernetes Health Check", | ||
TTL: "100000h", | ||
Status: status, | ||
SuccessBeforePassing: 1, | ||
FailuresBeforeCritical: 1, | ||
}, | ||
} | ||
if len(tags) > 0 { | ||
service.Tags = tags | ||
|
@@ -272,6 +303,39 @@ func (r *EndpointsController) createServiceRegistrations(pod corev1.Pod, service | |
return service, proxyService, nil | ||
} | ||
|
||
// getConsulHealthCheckID deterministically generates a health check ID that will be unique to the Agent | ||
// where the health check is registered and deregistered. | ||
func getConsulHealthCheckID(pod *corev1.Pod, serviceID string) string { | ||
return fmt.Sprintf("%s/%s/kubernetes-health-check", pod.Namespace, serviceID) | ||
} | ||
|
||
// getReadyStatusAndReason returns the formatted status string to pass to Consul based on the | ||
// ready state of the pod along with the reason message which will be passed into the Notes | ||
// field of the Consul health check. | ||
func getReadyStatusAndReason(pod *corev1.Pod) (string, string, error) { | ||
// A pod might be pending if the init containers have run but the non-init | ||
// containers haven't reached running state. In this case we set a failing health | ||
// check so the pod doesn't receive traffic before it's ready. | ||
if pod.Status.Phase == corev1.PodPending { | ||
return api.HealthCritical, podPendingReasonMsg, nil | ||
} | ||
|
||
for _, cond := range pod.Status.Conditions { | ||
var consulStatus, reason string | ||
if cond.Type == corev1.PodReady { | ||
if cond.Status != corev1.ConditionTrue { | ||
consulStatus = api.HealthCritical | ||
reason = cond.Message | ||
} else { | ||
consulStatus = api.HealthPassing | ||
reason = kubernetesSuccessReasonMsg | ||
} | ||
return consulStatus, reason, nil | ||
} | ||
} | ||
return "", "", fmt.Errorf("no ready status for pod: %s", pod.Name) | ||
} | ||
|
||
// deregisterServiceOnAllAgents queries all agents for service instances that have the metadata | ||
// "k8s-service-name"=k8sSvcName and "k8s-namespace"=k8sSvcNamespace. The k8s service name may or may not match the | ||
// consul service name, but the k8s service name will always match the metadata on the Consul service | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This just got pushed into master and will go away when we rebase feature-tproxy next.