Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the replica count for kserve #2331

Merged
merged 1 commit into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ export type ServingRuntime = K8sResourceCommon & {
volumeMounts?: VolumeMount[];
}[];
supportedModelFormats: SupportedModelFormats[];
replicas: number;
replicas?: number;
volumes?: Volume[];
};
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ type MockResourceConfigType = {
name?: string;
namespace?: string;
displayName?: string;
replicas?: number;
platforms?: ServingRuntimePlatform[];
isModelmesh?: boolean;
};

export const mockServingRuntimeTemplateK8sResource = ({
name = 'template-1',
namespace = 'opendatahub',
displayName = 'New OVMS Server',
replicas = 1,
isModelmesh = false,
platforms,
}: MockResourceConfigType): TemplateKind => ({
apiVersion: 'template.openshift.io/v1',
Expand Down Expand Up @@ -76,7 +80,7 @@ export const mockServingRuntimeTemplateK8sResource = ({
grpcEndpoint: 'port:8085',
multiModel: true,
protocolVersions: ['grpc-v1'],
replicas: 1,
...(isModelmesh && { replicas }),
supportedModelFormats: [
{
autoSelect: true,
Expand Down
49 changes: 49 additions & 0 deletions frontend/src/api/__tests__/inferenceServices.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,53 @@ describe('assembleInferenceService', () => {
expect(inferenceService.spec.predictor.tolerations).toBeUndefined();
expect(inferenceService.spec.predictor.model.resources).toBeUndefined();
});

it('should provide max and min replicas if provided', async () => {
const replicaCount = 2;

const acceleratorProfileState: AcceleratorProfileState = {
acceleratorProfile: mockAcceleratorProfile({}),
acceleratorProfiles: [mockAcceleratorProfile({})],
initialAcceleratorProfile: mockAcceleratorProfile({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
undefined,
acceleratorProfileState,
replicaCount,
);

expect(inferenceService.spec.predictor.maxReplicas).toBe(replicaCount);
expect(inferenceService.spec.predictor.minReplicas).toBe(replicaCount);
});

it('should omit replica count if not provided', async () => {
const acceleratorProfileState: AcceleratorProfileState = {
acceleratorProfile: mockAcceleratorProfile({}),
acceleratorProfiles: [mockAcceleratorProfile({})],
initialAcceleratorProfile: mockAcceleratorProfile({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
undefined,
acceleratorProfileState,
);

expect(inferenceService.spec.predictor.maxReplicas).toBeUndefined();
expect(inferenceService.spec.predictor.minReplicas).toBeUndefined();
});
});
36 changes: 36 additions & 0 deletions frontend/src/api/__tests__/servingRuntimes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,40 @@ describe('assembleServingRuntime', () => {
servingRuntime.spec.containers[0].resources?.requests?.['nvidia.com/gpu'],
).toBeUndefined();
});

it('should have replica count on modelmesh', async () => {
const replicaCount = 2;
const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
numReplicas: replicaCount,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
undefined,
true,
);

expect(servingRuntime.spec.replicas).toBe(replicaCount);
});

it('should have replica count on modelmesh', async () => {
const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
undefined,
false,
);

expect(servingRuntime.spec.replicas).toBeUndefined();
});
});
7 changes: 7 additions & 0 deletions frontend/src/api/k8s/inferenceServices.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const assembleInferenceService = (
isModelMesh?: boolean,
inferenceService?: InferenceServiceKind,
acceleratorState?: AcceleratorProfileState,
replicaCount?: number,
): InferenceServiceKind => {
const { storage, format, servingRuntimeName, project } = data;
const name = editName || translateDisplayNameForK8s(data.name);
Expand Down Expand Up @@ -70,6 +71,8 @@ export const assembleInferenceService = (
},
spec: {
predictor: {
...(replicaCount && { minReplicas: replicaCount }),
...(replicaCount && { maxReplicas: replicaCount }),
model: {
modelFormat: {
name: format.name,
Expand Down Expand Up @@ -149,6 +152,7 @@ export const createInferenceService = (
secretKey?: string,
isModelMesh?: boolean,
acceleratorState?: AcceleratorProfileState,
replicaCount?: number,
): Promise<InferenceServiceKind> => {
const inferenceService = assembleInferenceService(
data,
Expand All @@ -157,6 +161,7 @@ export const createInferenceService = (
isModelMesh,
undefined,
acceleratorState,
replicaCount,
);
return k8sCreateResource<InferenceServiceKind>({
model: InferenceServiceModel,
Expand All @@ -170,6 +175,7 @@ export const updateInferenceService = (
secretKey?: string,
isModelMesh?: boolean,
acceleratorState?: AcceleratorProfileState,
replicaCount?: number,
): Promise<InferenceServiceKind> => {
const inferenceService = assembleInferenceService(
data,
Expand All @@ -178,6 +184,7 @@ export const updateInferenceService = (
isModelMesh,
existingData,
acceleratorState,
replicaCount,
);

return k8sUpdateResource<InferenceServiceKind>({
Expand Down
6 changes: 5 additions & 1 deletion frontend/src/api/k8s/servingRuntimes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ export const assembleServingRuntime = (
},
};
}
updatedServingRuntime.spec.replicas = numReplicas;

delete updatedServingRuntime.spec.replicas;
if (isModelMesh) {
updatedServingRuntime.spec.replicas = numReplicas;
}

// Accelerator support

Expand Down
4 changes: 3 additions & 1 deletion frontend/src/k8sTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ export type ServingRuntimeKind = K8sResourceCommon & {
};
containers: ServingContainer[];
supportedModelFormats: SupportedModelFormats[];
replicas: number;
replicas?: number;
tolerations?: Toleration[];
volumes?: Volume[];
};
Expand Down Expand Up @@ -387,6 +387,8 @@ export type InferenceServiceKind = K8sResourceCommon & {
schemaPath?: string;
};
};
maxReplicas?: number;
minReplicas?: number;
};
};
status?: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ const ManageKServeModal: React.FC<ManageKServeModalProps> = ({
editInfo?.inferenceServiceEditInfo?.spec.predictor.model.runtime ||
translateDisplayNameForK8s(createDataInferenceService.name);

const replicaCount = createDataServingRuntime.numReplicas;

submitServingRuntimeResources(
servingRuntimeSelected,
createDataServingRuntime,
Expand All @@ -165,6 +167,7 @@ const ManageKServeModal: React.FC<ManageKServeModalProps> = ({
servingRuntimeName,
false,
acceleratorProfileState,
replicaCount,
),
)
.then(() => onSuccess())
Expand Down
14 changes: 13 additions & 1 deletion frontend/src/pages/modelServing/screens/projects/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ const createInferenceServiceAndDataConnection = (
editInfo?: InferenceServiceKind,
isModelMesh?: boolean,
acceleratorProfileState?: AcceleratorProfileState,
replicaCount?: number,
) => {
if (!existingStorage) {
return createAWSSecret(inferenceServiceData).then((secret) =>
Expand All @@ -268,12 +269,14 @@ const createInferenceServiceAndDataConnection = (
secret.metadata.name,
isModelMesh,
acceleratorProfileState,
replicaCount,
)
: createInferenceService(
inferenceServiceData,
secret.metadata.name,
isModelMesh,
acceleratorProfileState,
replicaCount,
),
);
}
Expand All @@ -284,8 +287,15 @@ const createInferenceServiceAndDataConnection = (
undefined,
isModelMesh,
acceleratorProfileState,
replicaCount,
)
: createInferenceService(inferenceServiceData, undefined, isModelMesh, acceleratorProfileState);
: createInferenceService(
inferenceServiceData,
undefined,
isModelMesh,
acceleratorProfileState,
replicaCount,
);
};

export const submitInferenceServiceResource = (
Expand All @@ -294,6 +304,7 @@ export const submitInferenceServiceResource = (
servingRuntimeName?: string,
isModelMesh?: boolean,
acceleratorProfileState?: AcceleratorProfileState,
replicaCount?: number,
): Promise<InferenceServiceKind> => {
const inferenceServiceData = {
...createData,
Expand All @@ -311,6 +322,7 @@ export const submitInferenceServiceResource = (
editInfo,
isModelMesh,
acceleratorProfileState,
replicaCount,
);
};

Expand Down
Loading