Skip to content

Commit

Permalink
add status NumServeEndpoints
Browse files Browse the repository at this point in the history
Signed-off-by: Yicheng-Lu-llll <[email protected]>
  • Loading branch information
Yicheng-Lu-llll committed Feb 15, 2024
1 parent 6087689 commit 0b3cce2
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 0 deletions.
3 changes: 3 additions & 0 deletions helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ rules:
- get
- list
- update
- apiGroups:
- ""
resources:
- endpoints
verbs:
- get
- list
- apiGroups:
- ""
resources:
Expand Down
7 changes: 7 additions & 0 deletions helm-chart/kuberay-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ rules:
- get
- list
- update
- apiGroups:
- ""
resources:
- endpoints
verbs:
- get
- list
- apiGroups:
- ""
resources:
Expand Down
3 changes: 3 additions & 0 deletions ray-operator/apis/ray/v1/rayservice_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ type RayServiceStatuses struct {
PendingServiceStatus RayServiceStatus `json:"pendingServiceStatus,omitempty"`
// ServiceStatus indicates the current RayService status.
ServiceStatus ServiceStatus `json:"serviceStatus,omitempty"`
// NumServeEndpoints indicates the number of Ray Pods that are actively serving or have been selected by the serve service.
// Ray Pods without a proxy actor or those that are unhealthy will not be counted.
NumServeEndpoints int32 `json:"NumServeEndpoints,omitempty"`
// observedGeneration is the most recent generation observed for this RayService. It corresponds to the
// RayService's generation, which is updated on mutation by the API Server.
// +optional
Expand Down
3 changes: 3 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayservices.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions ray-operator/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ rules:
- get
- list
- update
- apiGroups:
- ""
resources:
- endpoints
verbs:
- get
- list
- apiGroups:
- ""
resources:
Expand Down
29 changes: 29 additions & 0 deletions ray-operator/controllers/ray/rayservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ func NewRayServiceReconciler(mgr manager.Manager, dashboardClientFunc func() uti
// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list
// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=services/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update
Expand Down Expand Up @@ -212,6 +213,10 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
}
}

if err := r.calculateStatus(ctx, rayServiceInstance, rayClusterInstance); err != nil {
return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, err
}

// Final status update for any CR modification.
if r.inconsistentRayServiceStatuses(originalRayServiceInstance.Status, rayServiceInstance.Status) {
rayServiceInstance.Status.LastUpdateTime = &metav1.Time{Time: time.Now()}
Expand All @@ -224,6 +229,25 @@ func (r *RayServiceReconciler) Reconcile(ctx context.Context, request ctrl.Reque
return ctrl.Result{RequeueAfter: ServiceDefaultRequeueDuration}, nil
}

func (r *RayServiceReconciler) calculateStatus(ctx context.Context, rayServiceInstance *rayv1.RayService, rayClusterInstance *rayv1.RayCluster) error {
serveSvc, err := common.BuildServeServiceForRayService(ctx, *rayServiceInstance, *rayClusterInstance)
if err != nil {
return err
}
serveEndPoints := &corev1.Endpoints{}
if err := r.Get(ctx, client.ObjectKey{Name: serveSvc.Name, Namespace: serveSvc.Namespace}, serveEndPoints); err != nil && !errors.IsNotFound(err) {
r.Log.Error(err, "Fail to retrieve the Kubernetes Endpoints from the cluster!")
return err
}

numServeEndpoints := 0
for _, subset := range serveEndPoints.Subsets {
numServeEndpoints += len(subset.Addresses)
}
rayServiceInstance.Status.NumServeEndpoints = int32(numServeEndpoints)
return nil
}

// Checks whether the old and new RayServiceStatus are inconsistent by comparing different fields.
// If the only difference between the old and new status is the HealthLastUpdateTime field,
// the status update will not be triggered.
Expand Down Expand Up @@ -285,6 +309,11 @@ func (r *RayServiceReconciler) inconsistentRayServiceStatuses(oldStatus rayv1.Ra
return true
}

if oldStatus.NumServeEndpoints != newStatus.NumServeEndpoints {
r.Log.Info(fmt.Sprintf("inconsistentRayServiceStatus RayService NumServeEndpoints changed from %d to %d", oldStatus.NumServeEndpoints, newStatus.NumServeEndpoints))
return true
}

if r.inconsistentRayServiceStatus(oldStatus.ActiveServiceStatus, newStatus.ActiveServiceStatus) {
r.Log.Info("inconsistentRayServiceStatus RayService ActiveServiceStatus changed")
return true
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0b3cce2

Please sign in to comment.