Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix clean up of old attributes when containers are not restarting #271

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions controllers/galera_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,16 @@ func getRunningPodsMissingGcomm(ctx context.Context, pods []corev1.Pod, instance
return
}

// getGaleraContainerID retrieves the ContainerID of the galera container running in a pod
func getGaleraContainerID(pod *corev1.Pod) (found bool, CID string) {
for _, container := range pod.Status.ContainerStatuses {
if container.Name == "galera" {
return true, container.ContainerID
}
}
return false, ""
}

// isGaleraContainerStartedAndWaiting checks whether the galera container is waiting for a gcomm_uri file
func isGaleraContainerStartedAndWaiting(ctx context.Context, pod *corev1.Pod, instance *mariadbv1.Galera, h *helper.Helper, config *rest.Config) bool {
waiting := false
Expand Down Expand Up @@ -282,14 +292,14 @@ func assertPodsAttributesValidity(helper *helper.Helper, instance *mariadbv1.Gal
// A node can have various attributes depending on its known state.
// A ContainerID attribute is only present if the node is being started.
attrCID := instance.Status.Attributes[pod.Name].ContainerID
podCID := pod.Status.ContainerStatuses[0].ContainerID
if attrCID != "" && attrCID != podCID {
containerFound, podCID := getGaleraContainerID(&pod)
if !containerFound || (attrCID != "" && attrCID != podCID) {
// This gcomm URI was pushed in a pod which was restarted
// before the attribute got cleared, which means the pod
// failed to start galera. Clear the attribute here, and
// reprobe the pod's state in the next reconcile loop
clearPodAttributes(instance, pod.Name)
util.LogForObject(helper, "Pod restarted while galera was starting", instance, "pod", pod.Name, "current pod ID", podCID, "recorded ID", attrCID)
util.LogForObject(helper, "Pod restarted while galera was starting", instance, "pod", pod.Name, "recorded ID", attrCID)
}
}
}
Expand Down
Loading