Skip to content

Commit

Permalink
Modify Accelerator support for kserve
Browse files Browse the repository at this point in the history
  • Loading branch information
lucferbux committed Dec 1, 2023
1 parent b1bcc05 commit 00c677c
Show file tree
Hide file tree
Showing 16 changed files with 411 additions and 130 deletions.
2 changes: 1 addition & 1 deletion backend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ export type ServingRuntime = K8sResourceCommon & {
args: string[];
image: string;
name: string;
resources: ContainerResources;
resources?: ContainerResources;
volumeMounts?: VolumeMount[];
}[];
supportedModelFormats: SupportedModelFormats[];
Expand Down
47 changes: 47 additions & 0 deletions frontend/src/__mocks__/mockAcceleratork8sResource.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { AcceleratorKind } from '~/k8sTypes';
import { genUID } from './mockUtils';

type MockResourceConfigType = {
name?: string;
namespace?: string;
displayName?: string;
identifier?: string;
enabled?: boolean;
tolerations?: {
key: string;
operator: string;
effect: string;
}[];
};

export const mockAcceleratork8sResource = ({
name = 'migrated-gpu',
namespace = 'test-project',
displayName = 'Nvidia GPU',
identifier = 'nvidia.com/gpu',
enabled = true,
tolerations = [
{
key: 'nvidia.com/gpu',
operator: 'Exists',
effect: 'NoSchedule',
},
],
}: MockResourceConfigType): AcceleratorKind => ({
apiVersion: 'dashboard.opendatahub.io/v1',
kind: 'AcceleratorProfile',
metadata: {
creationTimestamp: '2023-03-17T16:12:41Z',
generation: 1,
name,
namespace,
resourceVersion: '1309350',
uid: genUID('service'),
},
spec: {
identifier,
displayName,
enabled,
tolerations,
},
});
28 changes: 28 additions & 0 deletions frontend/src/__mocks__/mockInferenceServiceModalData.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import {
CreatingInferenceServiceObject,
InferenceServiceStorageType,
} from '~/pages/modelServing/screens/types';

type MockResourceConfigType = Partial<CreatingInferenceServiceObject>;

export const mockInferenceServiceModalData = ({
name = 'my-inference-service',
project = 'caikit-example',
servingRuntimeName = 'caikit',
storage = {
type: InferenceServiceStorageType.NEW_STORAGE,
path: '/caikit-llama',
dataConnection: 'aws-data-connection',
awsData: [],
},
format = {
name: 'caikit',
version: '1.0.0',
},
}: MockResourceConfigType): CreatingInferenceServiceObject => ({
name,
project,
servingRuntimeName,
storage,
format,
});
33 changes: 33 additions & 0 deletions frontend/src/__mocks__/mockServingRuntimeModalData.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types';

type MockResourceConfigType = Partial<CreatingServingRuntimeObject>;

export const mockServingRuntimeModalData = ({
name = 'my-inference-service',
servingRuntimeTemplateName = 'caikit',
numReplicas = 1,
modelSize = {
name: 'small',
resources: {
requests: {
cpu: '1',
memory: '1Gi',
},
limits: {
cpu: '1',
memory: '1Gi',
},
},
},
externalRoute = false,
tokenAuth = false,
tokens = [],
}: MockResourceConfigType): CreatingServingRuntimeObject => ({
name,
servingRuntimeTemplateName,
numReplicas,
modelSize,
externalRoute,
tokenAuth,
tokens,
});
99 changes: 68 additions & 31 deletions frontend/src/api/__tests__/inferenceServices.spec.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,14 @@
import { mockAcceleratork8sResource } from '~/__mocks__/mockAcceleratork8sResource';
import { mockInferenceServiceModalData } from '~/__mocks__/mockInferenceServiceModalData';
import { assembleInferenceService } from '~/api/k8s/inferenceServices';
import { InferenceServiceStorageType } from '~/pages/modelServing/screens/types';
import { translateDisplayNameForK8s } from '~/pages/projects/utils';
import { AcceleratorState } from '~/utilities/useAcceleratorState';

global.structuredClone = (val: unknown) => JSON.parse(JSON.stringify(val));

describe('assembleInferenceService', () => {
it('should have the right annotations when creating for Kserve', async () => {
const inferenceService = assembleInferenceService({
name: 'my-inference-service',
project: 'caikit-example',
servingRuntimeName: 'caikit',
storage: {
type: InferenceServiceStorageType.NEW_STORAGE,
path: '/caikit-llama',
dataConnection: 'aws-data-connection',
awsData: [],
},
format: {
name: 'caikit',
version: '1.0.0',
},
});
const inferenceService = assembleInferenceService(mockInferenceServiceModalData({}));

expect(inferenceService.metadata.annotations).toBeDefined();
expect(inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe(
Expand All @@ -36,21 +25,7 @@ describe('assembleInferenceService', () => {

it('should have the right annotations when creating for modelmesh', async () => {
const inferenceService = assembleInferenceService(
{
name: 'my-inference-service',
project: 'caikit-example',
servingRuntimeName: 'caikit',
storage: {
type: InferenceServiceStorageType.NEW_STORAGE,
path: '/caikit-llama',
dataConnection: 'aws-data-connection',
awsData: [],
},
format: {
name: 'caikit',
version: '1.0.0',
},
},
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
Expand All @@ -68,4 +43,66 @@ describe('assembleInferenceService', () => {
undefined,
);
});

it('should handle name and display name', async () => {
const displayName = 'Llama model';

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({ name: displayName }),
);

expect(inferenceService.metadata.annotations).toBeDefined();
expect(inferenceService.metadata.annotations?.['openshift.io/display-name']).toBe(displayName);
expect(inferenceService.metadata.name).toBe(translateDisplayNameForK8s(displayName));
});

it('should add accelerator if kserve and accelerator found', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
false,
undefined,
acceleratorState,
);

expect(inferenceService.spec.predictor.tolerations).toBeDefined();
expect(inferenceService.spec.predictor.tolerations?.[0].key).toBe(
mockAcceleratork8sResource({}).spec.tolerations?.[0].key,
);
expect(inferenceService.spec.predictor.model.resources?.limits?.['nvidia.com/gpu']).toBe(1);
expect(inferenceService.spec.predictor.model.resources?.requests?.['nvidia.com/gpu']).toBe(1);
});

it('should not add accelerator if modelmesh and accelerator found', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
undefined,
acceleratorState,
);

expect(inferenceService.spec.predictor.tolerations).toBeUndefined();
expect(inferenceService.spec.predictor.model.resources).toBeUndefined();
});
});
101 changes: 69 additions & 32 deletions frontend/src/api/__tests__/servingRuntimes.spec.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
import { mockAcceleratork8sResource } from '~/__mocks__/mockAcceleratork8sResource';
import { mockServingRuntimeK8sResource } from '~/__mocks__/mockServingRuntimeK8sResource';
import { mockServingRuntimeModalData } from '~/__mocks__/mockServingRuntimeModalData';
import { mockServingRuntimeTemplateK8sResource } from '~/__mocks__/mockServingRuntimeTemplateK8sResource';
import { assembleServingRuntime } from '~/api/k8s/servingRuntimes';
import { ServingRuntimeKind } from '~/k8sTypes';
import { AcceleratorState } from '~/utilities/useAcceleratorState';

global.structuredClone = (val: unknown) => JSON.parse(JSON.stringify(val));

describe('assembleServingRuntime', () => {
it('should omit enable-xxxx annotations when creating', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test false values
mockServingRuntimeModalData({
externalRoute: false,
tokenAuth: false,
},
}),
'test',
mockServingRuntimeTemplateK8sResource({}).objects[0] as ServingRuntimeKind,
false,
Expand All @@ -31,16 +28,10 @@ describe('assembleServingRuntime', () => {

it('should remove enable-xxxx annotations when editing', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test false values
mockServingRuntimeModalData({
externalRoute: false,
tokenAuth: false,
},
}),
'test',
mockServingRuntimeK8sResource({ auth: true, route: true }),
false,
Expand All @@ -54,16 +45,10 @@ describe('assembleServingRuntime', () => {

it('should add enable-xxxx annotations when creating', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test true values
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
},
}),
'test',
mockServingRuntimeTemplateK8sResource({}).objects[0] as ServingRuntimeKind,
false,
Expand All @@ -77,16 +62,10 @@ describe('assembleServingRuntime', () => {

it('should add enable-xxxx annotations when editing', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test true values
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
},
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
false,
Expand All @@ -97,4 +76,62 @@ describe('assembleServingRuntime', () => {
expect(servingRuntime.metadata.annotations?.['enable-auth']).toBe('true');
expect(servingRuntime.metadata.annotations?.['enable-route']).toBe('true');
});

it('should add tolerations and gpu on modelmesh', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
acceleratorState,
true,
);

expect(servingRuntime.spec.tolerations).toBeDefined();
expect(servingRuntime.spec.containers[0].resources?.limits?.['nvidia.com/gpu']).toBe(1);
expect(servingRuntime.spec.containers[0].resources?.requests?.['nvidia.com/gpu']).toBe(1);
});

it('should not add tolerations and gpu on kserve', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
acceleratorState,
false,
);

expect(servingRuntime.spec.tolerations).toBeUndefined();
expect(servingRuntime.spec.containers[0].resources?.limits?.['nvidia.com/gpu']).toBeUndefined();
expect(
servingRuntime.spec.containers[0].resources?.requests?.['nvidia.com/gpu'],
).toBeUndefined();
});
});
Loading

0 comments on commit 00c677c

Please sign in to comment.