From 791ea372a72900f47ca2e1166751c5219c83d8c0 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 17 Apr 2024 09:28:34 -0700 Subject: [PATCH] [Bug] KubeRay operator failed to watch endpoint (#2080) --- .../templates/multiple_namespaces_role.yaml | 1 + helm-chart/kuberay-operator/templates/role.yaml | 1 + ray-operator/config/rbac/role.yaml | 1 + .../controllers/ray/rayservice_controller.go | 2 +- tests/framework/prototype.py | 13 ++++++++----- tests/test_sample_rayservice_yamls.py | 6 +----- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml b/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml index 4925893bce..1f33c94199 100644 --- a/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml +++ b/helm-chart/kuberay-operator/templates/multiple_namespaces_role.yaml @@ -39,6 +39,7 @@ rules: verbs: - get - list + - watch - apiGroups: - "" resources: diff --git a/helm-chart/kuberay-operator/templates/role.yaml b/helm-chart/kuberay-operator/templates/role.yaml index cc1e3bf375..c6b12f5851 100644 --- a/helm-chart/kuberay-operator/templates/role.yaml +++ b/helm-chart/kuberay-operator/templates/role.yaml @@ -35,6 +35,7 @@ rules: verbs: - get - list + - watch - apiGroups: - "" resources: diff --git a/ray-operator/config/rbac/role.yaml b/ray-operator/config/rbac/role.yaml index c15fdd9bfc..ad58ee59ba 100644 --- a/ray-operator/config/rbac/role.yaml +++ b/ray-operator/config/rbac/role.yaml @@ -32,6 +32,7 @@ rules: verbs: - get - list + - watch - apiGroups: - "" resources: diff --git a/ray-operator/controllers/ray/rayservice_controller.go b/ray-operator/controllers/ray/rayservice_controller.go index fbf08d5b72..eb98498ba4 100644 --- a/ray-operator/controllers/ray/rayservice_controller.go +++ b/ray-operator/controllers/ray/rayservice_controller.go @@ -82,7 +82,7 @@ func NewRayServiceReconciler(ctx context.Context, mgr manager.Manager, dashboard // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list +// +kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services/status,verbs=get;update;patch // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;create;update diff --git a/tests/framework/prototype.py b/tests/framework/prototype.py index 2e9cb53c11..61c375203d 100644 --- a/tests/framework/prototype.py +++ b/tests/framework/prototype.py @@ -1,9 +1,9 @@ """Configuration test framework for KubeRay""" import json -import jsonpatch -from typing import Dict, List, Optional -import unittest import time +import unittest +from typing import Dict, List, Optional +import jsonpatch from framework.utils import ( create_custom_object, @@ -306,17 +306,20 @@ def assert_rule(self, custom_resource, cr_namespace): name=custom_resource["metadata"]["name"], namespace=cr_namespace, path=query.get("path").rstrip("/"), - json=json.dumps(query["json_args"]), + json=json.dumps(query["json_args"]) ) + if self.start_in_background: - shell_subprocess_run(f"{cmd} &", hide_output=True) + shell_subprocess_run(f"{cmd} &", hide_output=False) else: output = shell_subprocess_check_output(cmd) + logger.info("curl output: %s", output.decode('utf-8')) if hasattr(query.get("expected_output"), "__iter__"): assert output.decode('utf-8') in query["expected_output"] else: assert output.decode('utf-8') == query["expected_output"] + time.sleep(1) class AutoscaleRule(Rule): def __init__( diff --git a/tests/test_sample_rayservice_yamls.py b/tests/test_sample_rayservice_yamls.py index e99f4711b7..ee377d4d21 100644 --- a/tests/test_sample_rayservice_yamls.py +++ b/tests/test_sample_rayservice_yamls.py @@ -1,12 +1,11 @@ ''' Test sample RayService YAML files to catch invalid and outdated ones. ''' from copy import deepcopy -from kubernetes import client import logging import pytest import sys from tempfile import NamedTemporaryFile import time -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional import yaml from framework.prototype import ( @@ -15,10 +14,7 @@ EasyJobRule, CurlServiceRule, AutoscaleRule, - get_expected_head_pods, - get_expected_worker_pods, show_cluster_info, - check_pod_running, ) from framework.utils import (