Skip to content

Commit

Permalink
[Refactor][RayCluster] Add RayClusterRedisPodAssociationOptions and H…
Browse files Browse the repository at this point in the history
…eadServiceAssociationOptions
  • Loading branch information
evalaiyc98 committed Apr 5, 2024
1 parent 6359d3c commit acb33b5
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 24 deletions.
22 changes: 22 additions & 0 deletions ray-operator/controllers/ray/common/association.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ func RayClusterWorkerPodsAssociationOptions(instance *rayv1.RayCluster) Associat
}
}

func RayClusterRedisPodAssociationOptions(instance *rayv1.RayCluster) AssociationOptions {
return AssociationOptions{
client.InNamespace(instance.Namespace),
client.MatchingLabels{
utils.RayClusterLabelKey: instance.Name,
utils.RayNodeTypeLabelKey: string(rayv1.RedisCleanupNode),
},
}
}

func RayClusterGroupPodsAssociationOptions(instance *rayv1.RayCluster, group string) AssociationOptions {
return AssociationOptions{
client.InNamespace(instance.Namespace),
Expand All @@ -93,6 +103,18 @@ func RayClusterAllPodsAssociationOptions(instance *rayv1.RayCluster) Association
}
}

func HeadServiceAssociationOptions(instance *rayv1.RayCluster) AssociationOptions {
return AssociationOptions{
client.MatchingLabels{
utils.RayClusterLabelKey: instance.Name,
utils.RayNodeTypeLabelKey: string(rayv1.HeadNode),
utils.RayIDLabelKey: utils.CheckLabel(utils.GenerateIdentifier(instance.Name, rayv1.HeadNode)),
utils.KubernetesApplicationNameLabelKey: utils.ApplicationName,
utils.KubernetesCreatedByLabelKey: utils.ComponentName,
},
}
}

func RayServiceServeServiceNamespacedName(rayService *rayv1.RayService) types.NamespacedName {
if rayService.Spec.ServeService != nil && rayService.Spec.ServeService.Name != "" {
return types.NamespacedName{
Expand Down
37 changes: 13 additions & 24 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,8 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request
}

// We can start the Redis cleanup process now because the head Pod has been terminated.
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.RedisCleanupNode)}
redisCleanupJobs := batchv1.JobList{}
if err := r.List(ctx, &redisCleanupJobs, client.InNamespace(instance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &redisCleanupJobs, common.RayClusterRedisPodAssociationOptions(instance).ToListOptions()...); err != nil {
return ctrl.Result{RequeueAfter: DefaultRequeueDuration}, err
}

Expand Down Expand Up @@ -438,8 +437,7 @@ func (r *RayClusterReconciler) reconcileIngress(ctx context.Context, instance *r
func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, instance *rayv1.RayCluster) error {
logger := ctrl.LoggerFrom(ctx)
headRoutes := routev1.RouteList{}
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name}
if err := r.List(ctx, &headRoutes, client.InNamespace(instance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &headRoutes, common.RayClusterAllPodsAssociationOptions(instance).ToListOptions()...); err != nil {
logger.Error(err, "Route Listing error!", "Route.Error", err)
return err
}
Expand Down Expand Up @@ -473,8 +471,7 @@ func (r *RayClusterReconciler) reconcileRouteOpenShift(ctx context.Context, inst
func (r *RayClusterReconciler) reconcileIngressKubernetes(ctx context.Context, instance *rayv1.RayCluster) error {
logger := ctrl.LoggerFrom(ctx)
headIngresses := networkingv1.IngressList{}
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name}
if err := r.List(ctx, &headIngresses, client.InNamespace(instance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &headIngresses, common.RayClusterAllPodsAssociationOptions(instance).ToListOptions()...); err != nil {
return err
}

Expand Down Expand Up @@ -506,9 +503,8 @@ func (r *RayClusterReconciler) reconcileIngressKubernetes(ctx context.Context, i
func (r *RayClusterReconciler) reconcileHeadService(ctx context.Context, instance *rayv1.RayCluster) error {
logger := ctrl.LoggerFrom(ctx)
services := corev1.ServiceList{}
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode)}

if err := r.List(ctx, &services, client.InNamespace(instance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &services, common.RayClusterHeadPodsAssociationOptions(instance).ToListOptions()...); err != nil {
return err
}

Expand Down Expand Up @@ -1249,8 +1245,7 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra
newInstance.Status.ObservedGeneration = newInstance.ObjectMeta.Generation

runtimePods := corev1.PodList{}
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: newInstance.Name}
if err := r.List(ctx, &runtimePods, client.InNamespace(newInstance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &runtimePods, common.RayClusterAllPodsAssociationOptions(newInstance).ToListOptions()...); err != nil {
return nil, err
}

Expand Down Expand Up @@ -1302,30 +1297,28 @@ func (r *RayClusterReconciler) getHeadPodIP(ctx context.Context, instance *rayv1
logger := ctrl.LoggerFrom(ctx)

runtimePods := corev1.PodList{}
filterLabels := client.MatchingLabels{utils.RayClusterLabelKey: instance.Name, utils.RayNodeTypeLabelKey: string(rayv1.HeadNode)}
if err := r.List(ctx, &runtimePods, client.InNamespace(instance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &runtimePods, common.RayClusterHeadPodsAssociationOptions(instance).ToListOptions()...); err != nil {
logger.Error(err, "Failed to list pods while getting head pod ip.")
return "", err
}
if len(runtimePods.Items) != 1 {
logger.Info(fmt.Sprintf("Found %d head pods. cluster name %s, filter labels %v", len(runtimePods.Items), instance.Name, filterLabels))
logger.Info(fmt.Sprintf("Found %d head pods. cluster name %s, filter labels %v", len(runtimePods.Items), instance.Name, common.RayClusterHeadPodsAssociationOptions(instance).ToListOptions()))
return "", nil
}
return runtimePods.Items[0].Status.PodIP, nil
}

func (r *RayClusterReconciler) getHeadServiceIP(ctx context.Context, instance *rayv1.RayCluster) (string, error) {
runtimeServices := corev1.ServiceList{}
filterLabels := client.MatchingLabels(common.HeadServiceLabels(*instance))
if err := r.List(ctx, &runtimeServices, client.InNamespace(instance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &runtimeServices, common.HeadServiceAssociationOptions(instance).ToListOptions()...); err != nil {
return "", err
}
if len(runtimeServices.Items) < 1 {
return "", fmt.Errorf("unable to find head service. cluster name %s, filter labels %v", instance.Name, filterLabels)
return "", fmt.Errorf("unable to find head service. cluster name %s, filter labels %v", instance.Name, common.HeadServiceAssociationOptions(instance).ToListOptions())
} else if len(runtimeServices.Items) > 1 {
return "", fmt.Errorf("found multiple head services. cluster name %s, filter labels %v", instance.Name, filterLabels)
return "", fmt.Errorf("found multiple head services. cluster name %s, filter labels %v", instance.Name, common.HeadServiceAssociationOptions(instance).ToListOptions())
} else if runtimeServices.Items[0].Spec.ClusterIP == "" {
return "", fmt.Errorf("head service IP is empty. cluster name %s, filter labels %v", instance.Name, filterLabels)
return "", fmt.Errorf("head service IP is empty. cluster name %s, filter labels %v", instance.Name, common.HeadServiceAssociationOptions(instance).ToListOptions())
}

return runtimeServices.Items[0].Spec.ClusterIP, nil
Expand All @@ -1337,11 +1330,7 @@ func (r *RayClusterReconciler) updateEndpoints(ctx context.Context, instance *ra
// We assume we can find the right one by filtering Services with appropriate label selectors
// and picking the first one. We may need to select by name in the future if the Service naming is stable.
rayHeadSvc := corev1.ServiceList{}
filterLabels := client.MatchingLabels{
utils.RayClusterLabelKey: instance.Name,
utils.RayNodeTypeLabelKey: "head",
}
if err := r.List(ctx, &rayHeadSvc, client.InNamespace(instance.Namespace), filterLabels); err != nil {
if err := r.List(ctx, &rayHeadSvc, common.RayClusterHeadPodsAssociationOptions(instance).ToListOptions()...); err != nil {
return err
}

Expand All @@ -1366,7 +1355,7 @@ func (r *RayClusterReconciler) updateEndpoints(ctx context.Context, instance *ra
}
}
} else {
logger.Info("updateEndpoints", "unable to find a Service for this RayCluster. Not adding RayCluster status.endpoints", instance.Name, "Service selectors", filterLabels)
logger.Info("updateEndpoints", "unable to find a Service for this RayCluster. Not adding RayCluster status.endpoints", instance.Name, "Service selectors", common.RayClusterHeadPodsAssociationOptions(instance).ToListOptions())
}

return nil
Expand Down

0 comments on commit acb33b5

Please sign in to comment.