Skip to content

Commit

Permalink
[Hotfix] Increase the timeout of the ProxyActor health check (#2082)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevin85421 authored Apr 17, 2024
1 parent 3738f78 commit 981c943
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 13 deletions.
2 changes: 1 addition & 1 deletion ray-operator/controllers/ray/rayservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,7 @@ func (r *RayServiceReconciler) labelHeadPodForServeStatus(ctx context.Context, r
originalLabels[key] = value
}

if httpProxyClient.CheckHealth() == nil {
if err = httpProxyClient.CheckProxyActorHealth(ctx); err == nil {
headPod.Labels[utils.RayClusterServingServiceLabelKey] = utils.EnableRayClusterServingServiceTrue
} else {
headPod.Labels[utils.RayClusterServingServiceLabelKey] = utils.EnableRayClusterServingServiceFalse
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package utils

import (
"context"
"fmt"
"net/http"
"time"
Expand All @@ -21,7 +22,7 @@ func (r *FakeRayHttpProxyClient) SetHostIp(hostIp string, port int) {
r.httpProxyURL = fmt.Sprintf("http://%s:%d", hostIp, port)
}

func (r *FakeRayHttpProxyClient) CheckHealth() error {
func (r *FakeRayHttpProxyClient) CheckProxyActorHealth(ctx context.Context) error {
// TODO: test check return error cases.
// Always return successful.
return nil
Expand Down
25 changes: 14 additions & 11 deletions ray-operator/controllers/ray/utils/httpproxy_httpclient.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
package utils

import (
"context"
"fmt"
"io"
"net/http"
"time"

ctrl "sigs.k8s.io/controller-runtime"
)

type RayHttpProxyClientInterface interface {
InitClient()
CheckHealth() error
CheckProxyActorHealth(ctx context.Context) error
SetHostIp(hostIp string, port int)
}

Expand All @@ -24,29 +27,29 @@ type RayHttpProxyClient struct {

func (r *RayHttpProxyClient) InitClient() {
r.client = http.Client{
Timeout: 20 * time.Millisecond,
Timeout: 2 * time.Second,
}
}

func (r *RayHttpProxyClient) SetHostIp(hostIp string, port int) {
r.httpProxyURL = fmt.Sprintf("http://%s:%d/", hostIp, port)
}

func (r *RayHttpProxyClient) CheckHealth() error {
req, err := http.NewRequest("GET", r.httpProxyURL+RayServeProxyHealthPath, nil)
if err != nil {
return err
}

resp, err := r.client.Do(req)
// CheckProxyActorHealth checks the health status of the Ray Serve proxy actor.
func (r *RayHttpProxyClient) CheckProxyActorHealth(ctx context.Context) error {
logger := ctrl.LoggerFrom(ctx)
resp, err := r.client.Get(r.httpProxyURL + RayServeProxyHealthPath)
if err != nil {
logger.Error(err, "CheckProxyActorHealth fails.")
return err
}
defer resp.Body.Close()

body, _ := io.ReadAll(resp.Body)
if resp.StatusCode < 200 || resp.StatusCode > 299 {
return fmt.Errorf("RayHttpProxyClient CheckHealth fail: %s %s", resp.Status, string(body))
if resp.StatusCode != 200 {
err := fmt.Errorf("CheckProxyActorHealth fails: Status code is not 200")
logger.Error(err, "CheckProxyActorHealth fails.", "status code", resp.StatusCode, "status", resp.Status, "body", string(body))
return err
}

return nil
Expand Down

0 comments on commit 981c943

Please sign in to comment.