Skip to content

Commit

Permalink
Fix the replica count for kserve
Browse files Browse the repository at this point in the history
  • Loading branch information
lucferbux committed Jan 9, 2024
1 parent c9f219a commit a77d76f
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 5 deletions.
2 changes: 1 addition & 1 deletion backend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -892,7 +892,7 @@ export type ServingRuntime = K8sResourceCommon & {
volumeMounts?: VolumeMount[];
}[];
supportedModelFormats: SupportedModelFormats[];
replicas: number;
replicas?: number;
volumes?: Volume[];
};
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ type MockResourceConfigType = {
name?: string;
namespace?: string;
displayName?: string;
replicas?: number;
platforms?: ServingRuntimePlatform[];
isModelmesh?: boolean;
};

export const mockServingRuntimeTemplateK8sResource = ({
name = 'template-1',
namespace = 'opendatahub',
displayName = 'New OVMS Server',
replicas = 1,
isModelmesh = false,
platforms,
}: MockResourceConfigType): TemplateKind => ({
apiVersion: 'template.openshift.io/v1',
Expand Down Expand Up @@ -76,7 +80,7 @@ export const mockServingRuntimeTemplateK8sResource = ({
grpcEndpoint: 'port:8085',
multiModel: true,
protocolVersions: ['grpc-v1'],
replicas: 1,
...(isModelmesh && { replicas }),
supportedModelFormats: [
{
autoSelect: true,
Expand Down
49 changes: 49 additions & 0 deletions frontend/src/api/__tests__/inferenceServices.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,53 @@ describe('assembleInferenceService', () => {
expect(inferenceService.spec.predictor.tolerations).toBeUndefined();
expect(inferenceService.spec.predictor.model.resources).toBeUndefined();
});

it('should provide max and min replicas if provided', async () => {
const replicaCount = 2;

const acceleratorProfileState: AcceleratorProfileState = {
acceleratorProfile: mockAcceleratorProfile({}),
acceleratorProfiles: [mockAcceleratorProfile({})],
initialAcceleratorProfile: mockAcceleratorProfile({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
undefined,
acceleratorProfileState,
replicaCount,
);

expect(inferenceService.spec.predictor.maxReplicas).toBe(replicaCount);
expect(inferenceService.spec.predictor.minReplicas).toBe(replicaCount);
});

it('should omit replica count if not provided', async () => {
const acceleratorProfileState: AcceleratorProfileState = {
acceleratorProfile: mockAcceleratorProfile({}),
acceleratorProfiles: [mockAcceleratorProfile({})],
initialAcceleratorProfile: mockAcceleratorProfile({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
undefined,
acceleratorProfileState,
);

expect(inferenceService.spec.predictor.maxReplicas).toBeUndefined();
expect(inferenceService.spec.predictor.minReplicas).toBeUndefined();
});
});
36 changes: 36 additions & 0 deletions frontend/src/api/__tests__/servingRuntimes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,40 @@ describe('assembleServingRuntime', () => {
servingRuntime.spec.containers[0].resources?.requests?.['nvidia.com/gpu'],
).toBeUndefined();
});

it('should have replica count on modelmesh', async () => {
const replicaCount = 2;
const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
numReplicas: replicaCount,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
undefined,
true,
);

expect(servingRuntime.spec.replicas).toBe(replicaCount);
});

it('should have replica count on modelmesh', async () => {
const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
undefined,
false,
);

expect(servingRuntime.spec.replicas).toBeUndefined();
});
});
7 changes: 7 additions & 0 deletions frontend/src/api/k8s/inferenceServices.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const assembleInferenceService = (
isModelMesh?: boolean,
inferenceService?: InferenceServiceKind,
acceleratorState?: AcceleratorProfileState,
replicaCount?: number,
): InferenceServiceKind => {
const { storage, format, servingRuntimeName, project } = data;
const name = editName || translateDisplayNameForK8s(data.name);
Expand Down Expand Up @@ -71,6 +72,8 @@ export const assembleInferenceService = (
},
spec: {
predictor: {
...(replicaCount && { minReplicas: replicaCount }),
...(replicaCount && { maxReplicas: replicaCount }),
model: {
modelFormat: {
name: format.name,
Expand Down Expand Up @@ -150,6 +153,7 @@ export const createInferenceService = (
secretKey?: string,
isModelMesh?: boolean,
acceleratorState?: AcceleratorProfileState,
replicaCount?: number,
): Promise<InferenceServiceKind> => {
const inferenceService = assembleInferenceService(
data,
Expand All @@ -158,6 +162,7 @@ export const createInferenceService = (
isModelMesh,
undefined,
acceleratorState,
replicaCount,
);
return k8sCreateResource<InferenceServiceKind>({
model: InferenceServiceModel,
Expand All @@ -171,6 +176,7 @@ export const updateInferenceService = (
secretKey?: string,
isModelMesh?: boolean,
acceleratorState?: AcceleratorProfileState,
replicaCount?: number,
): Promise<InferenceServiceKind> => {
const inferenceService = assembleInferenceService(
data,
Expand All @@ -179,6 +185,7 @@ export const updateInferenceService = (
isModelMesh,
existingData,
acceleratorState,
replicaCount,
);

return k8sUpdateResource<InferenceServiceKind>({
Expand Down
6 changes: 5 additions & 1 deletion frontend/src/api/k8s/servingRuntimes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ export const assembleServingRuntime = (
},
};
}
updatedServingRuntime.spec.replicas = numReplicas;

delete updatedServingRuntime.spec.replicas;
if (isModelMesh) {
updatedServingRuntime.spec.replicas = numReplicas;
}

// Accelerator support

Expand Down
4 changes: 3 additions & 1 deletion frontend/src/k8sTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ export type ServingRuntimeKind = K8sResourceCommon & {
};
containers: ServingContainer[];
supportedModelFormats: SupportedModelFormats[];
replicas: number;
replicas?: number;
tolerations?: Toleration[];
volumes?: Volume[];
};
Expand Down Expand Up @@ -386,6 +386,8 @@ export type InferenceServiceKind = K8sResourceCommon & {
schemaPath?: string;
};
};
maxReplicas?: number;
minReplicas?: number;
};
};
status?: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ const ManageKServeModal: React.FC<ManageKServeModalProps> = ({
editInfo?.inferenceServiceEditInfo?.spec.predictor.model.runtime ||
translateDisplayNameForK8s(createDataInferenceService.name);

const replicaCount = createDataServingRuntime.numReplicas;

Promise.all([
submitServingRuntimeResources(
servingRuntimeSelected,
Expand All @@ -162,6 +164,7 @@ const ManageKServeModal: React.FC<ManageKServeModalProps> = ({
servingRuntimeName,
false,
acceleratorProfileState,
replicaCount,
),
])
.then(() => onSuccess())
Expand Down
14 changes: 13 additions & 1 deletion frontend/src/pages/modelServing/screens/projects/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ const createInferenceServiceAndDataConnection = (
editInfo?: InferenceServiceKind,
isModelMesh?: boolean,
acceleratorProfileState?: AcceleratorProfileState,
replicaCount?: number,
) => {
if (!existingStorage) {
return createAWSSecret(inferenceServiceData).then((secret) =>
Expand All @@ -266,12 +267,14 @@ const createInferenceServiceAndDataConnection = (
secret.metadata.name,
isModelMesh,
acceleratorProfileState,
replicaCount,
)
: createInferenceService(
inferenceServiceData,
secret.metadata.name,
isModelMesh,
acceleratorProfileState,
replicaCount,
),
);
}
Expand All @@ -282,8 +285,15 @@ const createInferenceServiceAndDataConnection = (
undefined,
isModelMesh,
acceleratorProfileState,
replicaCount,
)
: createInferenceService(inferenceServiceData, undefined, isModelMesh, acceleratorProfileState);
: createInferenceService(
inferenceServiceData,
undefined,
isModelMesh,
acceleratorProfileState,
replicaCount,
);
};

export const submitInferenceServiceResource = (
Expand All @@ -292,6 +302,7 @@ export const submitInferenceServiceResource = (
servingRuntimeName?: string,
isModelMesh?: boolean,
acceleratorProfileState?: AcceleratorProfileState,
replicaCount?: number,
): Promise<InferenceServiceKind> => {
const inferenceServiceData = {
...createData,
Expand All @@ -309,6 +320,7 @@ export const submitInferenceServiceResource = (
editInfo,
isModelMesh,
acceleratorProfileState,
replicaCount,
);
};

Expand Down

0 comments on commit a77d76f

Please sign in to comment.