Fix the replica count for kserve

opendatahub-io · Jan 9, 2024 · a77d76f · a77d76f
1 parent c9f219a
commit a77d76f
Show file tree

Hide file tree

Showing 9 changed files with 122 additions and 5 deletions.
diff --git a/backend/src/types.ts b/backend/src/types.ts
@@ -892,7 +892,7 @@ export type ServingRuntime = K8sResourceCommon & {
       volumeMounts?: VolumeMount[];
     }[];
     supportedModelFormats: SupportedModelFormats[];
-    replicas: number;
+    replicas?: number;
     volumes?: Volume[];
   };
 };

diff --git a/frontend/src/__mocks__/mockServingRuntimeTemplateK8sResource.ts b/frontend/src/__mocks__/mockServingRuntimeTemplateK8sResource.ts
@@ -5,13 +5,17 @@ type MockResourceConfigType = {
   name?: string;
   namespace?: string;
   displayName?: string;
+  replicas?: number;
   platforms?: ServingRuntimePlatform[];
+  isModelmesh?: boolean;
 };
 
 export const mockServingRuntimeTemplateK8sResource = ({
   name = 'template-1',
   namespace = 'opendatahub',
   displayName = 'New OVMS Server',
+  replicas = 1,
+  isModelmesh = false,
   platforms,
 }: MockResourceConfigType): TemplateKind => ({
   apiVersion: 'template.openshift.io/v1',
@@ -76,7 +80,7 @@ export const mockServingRuntimeTemplateK8sResource = ({
         grpcEndpoint: 'port:8085',
         multiModel: true,
         protocolVersions: ['grpc-v1'],
-        replicas: 1,
+        ...(isModelmesh && { replicas }),
         supportedModelFormats: [
           {
             autoSelect: true,

diff --git a/frontend/src/api/__tests__/inferenceServices.spec.ts b/frontend/src/api/__tests__/inferenceServices.spec.ts
@@ -105,4 +105,53 @@ describe('assembleInferenceService', () => {
     expect(inferenceService.spec.predictor.tolerations).toBeUndefined();
     expect(inferenceService.spec.predictor.model.resources).toBeUndefined();
   });
+
+  it('should provide max and min replicas if provided', async () => {
+    const replicaCount = 2;
+
+    const acceleratorProfileState: AcceleratorProfileState = {
+      acceleratorProfile: mockAcceleratorProfile({}),
+      acceleratorProfiles: [mockAcceleratorProfile({})],
+      initialAcceleratorProfile: mockAcceleratorProfile({}),
+      count: 1,
+      additionalOptions: {},
+      useExisting: false,
+    };
+
+    const inferenceService = assembleInferenceService(
+      mockInferenceServiceModalData({}),
+      undefined,
+      undefined,
+      true,
+      undefined,
+      acceleratorProfileState,
+      replicaCount,
+    );
+
+    expect(inferenceService.spec.predictor.maxReplicas).toBe(replicaCount);
+    expect(inferenceService.spec.predictor.minReplicas).toBe(replicaCount);
+  });
+
+  it('should omit replica count if not provided', async () => {
+    const acceleratorProfileState: AcceleratorProfileState = {
+      acceleratorProfile: mockAcceleratorProfile({}),
+      acceleratorProfiles: [mockAcceleratorProfile({})],
+      initialAcceleratorProfile: mockAcceleratorProfile({}),
+      count: 1,
+      additionalOptions: {},
+      useExisting: false,
+    };
+
+    const inferenceService = assembleInferenceService(
+      mockInferenceServiceModalData({}),
+      undefined,
+      undefined,
+      true,
+      undefined,
+      acceleratorProfileState,
+    );
+
+    expect(inferenceService.spec.predictor.maxReplicas).toBeUndefined();
+    expect(inferenceService.spec.predictor.minReplicas).toBeUndefined();
+  });
 });
diff --git a/frontend/src/api/__tests__/servingRuntimes.spec.ts b/frontend/src/api/__tests__/servingRuntimes.spec.ts
@@ -134,4 +134,40 @@ describe('assembleServingRuntime', () => {
       servingRuntime.spec.containers[0].resources?.requests?.['nvidia.com/gpu'],
     ).toBeUndefined();
   });
+
+  it('should have replica count on modelmesh', async () => {
+    const replicaCount = 2;
+    const servingRuntime = assembleServingRuntime(
+      mockServingRuntimeModalData({
+        externalRoute: true,
+        tokenAuth: true,
+        numReplicas: replicaCount,
+      }),
+      'test',
+      mockServingRuntimeK8sResource({ auth: false, route: false }),
+      true,
+      false,
+      undefined,
+      true,
+    );
+
+    expect(servingRuntime.spec.replicas).toBe(replicaCount);
+  });
+
+  it('should have replica count on modelmesh', async () => {
+    const servingRuntime = assembleServingRuntime(
+      mockServingRuntimeModalData({
+        externalRoute: true,
+        tokenAuth: true,
+      }),
+      'test',
+      mockServingRuntimeK8sResource({ auth: false, route: false }),
+      true,
+      false,
+      undefined,
+      false,
+    );
+
+    expect(servingRuntime.spec.replicas).toBeUndefined();
+  });
 });
diff --git a/frontend/src/api/k8s/inferenceServices.ts b/frontend/src/api/k8s/inferenceServices.ts
@@ -23,6 +23,7 @@ export const assembleInferenceService = (
   isModelMesh?: boolean,
   inferenceService?: InferenceServiceKind,
   acceleratorState?: AcceleratorProfileState,
+  replicaCount?: number,
 ): InferenceServiceKind => {
   const { storage, format, servingRuntimeName, project } = data;
   const name = editName || translateDisplayNameForK8s(data.name);
@@ -71,6 +72,8 @@ export const assembleInferenceService = (
         },
         spec: {
           predictor: {
+            ...(replicaCount && { minReplicas: replicaCount }),
+            ...(replicaCount && { maxReplicas: replicaCount }),
             model: {
               modelFormat: {
                 name: format.name,
@@ -150,6 +153,7 @@ export const createInferenceService = (
   secretKey?: string,
   isModelMesh?: boolean,
   acceleratorState?: AcceleratorProfileState,
+  replicaCount?: number,
 ): Promise<InferenceServiceKind> => {
   const inferenceService = assembleInferenceService(
     data,
@@ -158,6 +162,7 @@ export const createInferenceService = (
     isModelMesh,
     undefined,
     acceleratorState,
+    replicaCount,
   );
   return k8sCreateResource<InferenceServiceKind>({
     model: InferenceServiceModel,
@@ -171,6 +176,7 @@ export const updateInferenceService = (
   secretKey?: string,
   isModelMesh?: boolean,
   acceleratorState?: AcceleratorProfileState,
+  replicaCount?: number,
 ): Promise<InferenceServiceKind> => {
   const inferenceService = assembleInferenceService(
     data,
@@ -179,6 +185,7 @@ export const updateInferenceService = (
     isModelMesh,
     existingData,
     acceleratorState,
+    replicaCount,
   );
 
   return k8sUpdateResource<InferenceServiceKind>({

diff --git a/frontend/src/api/k8s/servingRuntimes.ts b/frontend/src/api/k8s/servingRuntimes.ts
@@ -87,7 +87,11 @@ export const assembleServingRuntime = (
       },
     };
   }
-  updatedServingRuntime.spec.replicas = numReplicas;
+
+  delete updatedServingRuntime.spec.replicas;
+  if (isModelMesh) {
+    updatedServingRuntime.spec.replicas = numReplicas;
+  }
 
   // Accelerator support
 

diff --git a/frontend/src/k8sTypes.ts b/frontend/src/k8sTypes.ts
@@ -340,7 +340,7 @@ export type ServingRuntimeKind = K8sResourceCommon & {
     };
     containers: ServingContainer[];
     supportedModelFormats: SupportedModelFormats[];
-    replicas: number;
+    replicas?: number;
     tolerations?: Toleration[];
     volumes?: Volume[];
   };
@@ -386,6 +386,8 @@ export type InferenceServiceKind = K8sResourceCommon & {
           schemaPath?: string;
         };
       };
+      maxReplicas?: number;
+      minReplicas?: number;
     };
   };
   status?: {

diff --git a/frontend/src/pages/modelServing/screens/projects/kServeModal/ManageKServeModal.tsx b/frontend/src/pages/modelServing/screens/projects/kServeModal/ManageKServeModal.tsx
@@ -142,6 +142,8 @@ const ManageKServeModal: React.FC<ManageKServeModalProps> = ({
       editInfo?.inferenceServiceEditInfo?.spec.predictor.model.runtime ||
       translateDisplayNameForK8s(createDataInferenceService.name);
 
+    const replicaCount = createDataServingRuntime.numReplicas;
+
     Promise.all([
       submitServingRuntimeResources(
         servingRuntimeSelected,
@@ -162,6 +164,7 @@ const ManageKServeModal: React.FC<ManageKServeModalProps> = ({
         servingRuntimeName,
         false,
         acceleratorProfileState,
+        replicaCount,
       ),
     ])
       .then(() => onSuccess())

diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts
@@ -256,6 +256,7 @@ const createInferenceServiceAndDataConnection = (
   editInfo?: InferenceServiceKind,
   isModelMesh?: boolean,
   acceleratorProfileState?: AcceleratorProfileState,
+  replicaCount?: number,
 ) => {
   if (!existingStorage) {
     return createAWSSecret(inferenceServiceData).then((secret) =>
@@ -266,12 +267,14 @@ const createInferenceServiceAndDataConnection = (
             secret.metadata.name,
             isModelMesh,
             acceleratorProfileState,
+            replicaCount,
           )
         : createInferenceService(
             inferenceServiceData,
             secret.metadata.name,
             isModelMesh,
             acceleratorProfileState,
+            replicaCount,
           ),
     );
   }
@@ -282,8 +285,15 @@ const createInferenceServiceAndDataConnection = (
         undefined,
         isModelMesh,
         acceleratorProfileState,
+        replicaCount,
       )
-    : createInferenceService(inferenceServiceData, undefined, isModelMesh, acceleratorProfileState);
+    : createInferenceService(
+        inferenceServiceData,
+        undefined,
+        isModelMesh,
+        acceleratorProfileState,
+        replicaCount,
+      );
 };
 
 export const submitInferenceServiceResource = (
@@ -292,6 +302,7 @@ export const submitInferenceServiceResource = (
   servingRuntimeName?: string,
   isModelMesh?: boolean,
   acceleratorProfileState?: AcceleratorProfileState,
+  replicaCount?: number,
 ): Promise<InferenceServiceKind> => {
   const inferenceServiceData = {
     ...createData,
@@ -309,6 +320,7 @@ export const submitInferenceServiceResource = (
     editInfo,
     isModelMesh,
     acceleratorProfileState,
+    replicaCount,
   );
 };