Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify Accelerator support for kserve #2261

Merged
merged 1 commit into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ export type ServingRuntime = K8sResourceCommon & {
args: string[];
image: string;
name: string;
resources: ContainerResources;
resources?: ContainerResources;
volumeMounts?: VolumeMount[];
}[];
supportedModelFormats: SupportedModelFormats[];
Expand Down
47 changes: 47 additions & 0 deletions frontend/src/__mocks__/mockAcceleratork8sResource.ts
andrewballantyne marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { AcceleratorKind } from '~/k8sTypes';
import { genUID } from './mockUtils';

type MockResourceConfigType = {
name?: string;
namespace?: string;
displayName?: string;
identifier?: string;
enabled?: boolean;
tolerations?: {
key: string;
operator: string;
effect: string;
}[];
};

export const mockAcceleratork8sResource = ({
name = 'migrated-gpu',
namespace = 'test-project',
displayName = 'Nvidia GPU',
identifier = 'nvidia.com/gpu',
enabled = true,
tolerations = [
{
key: 'nvidia.com/gpu',
operator: 'Exists',
effect: 'NoSchedule',
},
],
}: MockResourceConfigType): AcceleratorKind => ({
apiVersion: 'dashboard.opendatahub.io/v1',
kind: 'AcceleratorProfile',
metadata: {
creationTimestamp: '2023-03-17T16:12:41Z',
generation: 1,
name,
namespace,
resourceVersion: '1309350',
uid: genUID('service'),
},
spec: {
identifier,
displayName,
enabled,
tolerations,
},
});
28 changes: 28 additions & 0 deletions frontend/src/__mocks__/mockInferenceServiceModalData.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import {
CreatingInferenceServiceObject,
InferenceServiceStorageType,
} from '~/pages/modelServing/screens/types';

type MockResourceConfigType = Partial<CreatingInferenceServiceObject>;

export const mockInferenceServiceModalData = ({
name = 'my-inference-service',
project = 'caikit-example',
servingRuntimeName = 'caikit',
storage = {
type: InferenceServiceStorageType.NEW_STORAGE,
path: '/caikit-llama',
dataConnection: 'aws-data-connection',
awsData: [],
},
format = {
name: 'caikit',
version: '1.0.0',
},
}: MockResourceConfigType): CreatingInferenceServiceObject => ({
name,
project,
servingRuntimeName,
storage,
format,
});
33 changes: 33 additions & 0 deletions frontend/src/__mocks__/mockServingRuntimeModalData.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { CreatingServingRuntimeObject } from '~/pages/modelServing/screens/types';

type MockResourceConfigType = Partial<CreatingServingRuntimeObject>;

export const mockServingRuntimeModalData = ({
name = 'my-inference-service',
servingRuntimeTemplateName = 'caikit',
numReplicas = 1,
modelSize = {
name: 'small',
resources: {
requests: {
cpu: '1',
memory: '1Gi',
},
limits: {
cpu: '1',
memory: '1Gi',
},
},
},
externalRoute = false,
tokenAuth = false,
tokens = [],
}: MockResourceConfigType): CreatingServingRuntimeObject => ({
name,
servingRuntimeTemplateName,
numReplicas,
modelSize,
externalRoute,
tokenAuth,
tokens,
});
99 changes: 68 additions & 31 deletions frontend/src/api/__tests__/inferenceServices.spec.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,14 @@
import { mockAcceleratork8sResource } from '~/__mocks__/mockAcceleratork8sResource';
import { mockInferenceServiceModalData } from '~/__mocks__/mockInferenceServiceModalData';
import { assembleInferenceService } from '~/api/k8s/inferenceServices';
import { InferenceServiceStorageType } from '~/pages/modelServing/screens/types';
import { translateDisplayNameForK8s } from '~/pages/projects/utils';
import { AcceleratorState } from '~/utilities/useAcceleratorState';

global.structuredClone = (val: unknown) => JSON.parse(JSON.stringify(val));

describe('assembleInferenceService', () => {
it('should have the right annotations when creating for Kserve', async () => {
const inferenceService = assembleInferenceService({
name: 'my-inference-service',
project: 'caikit-example',
servingRuntimeName: 'caikit',
storage: {
type: InferenceServiceStorageType.NEW_STORAGE,
path: '/caikit-llama',
dataConnection: 'aws-data-connection',
awsData: [],
},
format: {
name: 'caikit',
version: '1.0.0',
},
});
const inferenceService = assembleInferenceService(mockInferenceServiceModalData({}));

expect(inferenceService.metadata.annotations).toBeDefined();
expect(inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe(
Expand All @@ -36,21 +25,7 @@ describe('assembleInferenceService', () => {

it('should have the right annotations when creating for modelmesh', async () => {
const inferenceService = assembleInferenceService(
{
name: 'my-inference-service',
project: 'caikit-example',
servingRuntimeName: 'caikit',
storage: {
type: InferenceServiceStorageType.NEW_STORAGE,
path: '/caikit-llama',
dataConnection: 'aws-data-connection',
awsData: [],
},
format: {
name: 'caikit',
version: '1.0.0',
},
},
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
Expand All @@ -68,4 +43,66 @@ describe('assembleInferenceService', () => {
undefined,
);
});

it('should handle name and display name', async () => {
const displayName = 'Llama model';

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({ name: displayName }),
);

expect(inferenceService.metadata.annotations).toBeDefined();
expect(inferenceService.metadata.annotations?.['openshift.io/display-name']).toBe(displayName);
expect(inferenceService.metadata.name).toBe(translateDisplayNameForK8s(displayName));
});

it('should add accelerator if kserve and accelerator found', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
false,
undefined,
acceleratorState,
);

expect(inferenceService.spec.predictor.tolerations).toBeDefined();
expect(inferenceService.spec.predictor.tolerations?.[0].key).toBe(
mockAcceleratork8sResource({}).spec.tolerations?.[0].key,
);
expect(inferenceService.spec.predictor.model.resources?.limits?.['nvidia.com/gpu']).toBe(1);
expect(inferenceService.spec.predictor.model.resources?.requests?.['nvidia.com/gpu']).toBe(1);
});

it('should not add accelerator if modelmesh and accelerator found', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const inferenceService = assembleInferenceService(
mockInferenceServiceModalData({}),
undefined,
undefined,
true,
undefined,
acceleratorState,
);

expect(inferenceService.spec.predictor.tolerations).toBeUndefined();
expect(inferenceService.spec.predictor.model.resources).toBeUndefined();
});
});
101 changes: 69 additions & 32 deletions frontend/src/api/__tests__/servingRuntimes.spec.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
import { mockAcceleratork8sResource } from '~/__mocks__/mockAcceleratork8sResource';
import { mockServingRuntimeK8sResource } from '~/__mocks__/mockServingRuntimeK8sResource';
import { mockServingRuntimeModalData } from '~/__mocks__/mockServingRuntimeModalData';
import { mockServingRuntimeTemplateK8sResource } from '~/__mocks__/mockServingRuntimeTemplateK8sResource';
import { assembleServingRuntime } from '~/api/k8s/servingRuntimes';
import { ServingRuntimeKind } from '~/k8sTypes';
import { AcceleratorState } from '~/utilities/useAcceleratorState';

global.structuredClone = (val: unknown) => JSON.parse(JSON.stringify(val));

describe('assembleServingRuntime', () => {
it('should omit enable-xxxx annotations when creating', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test false values
mockServingRuntimeModalData({
externalRoute: false,
tokenAuth: false,
},
}),
'test',
mockServingRuntimeTemplateK8sResource({}).objects[0] as ServingRuntimeKind,
false,
Expand All @@ -31,16 +28,10 @@ describe('assembleServingRuntime', () => {

it('should remove enable-xxxx annotations when editing', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test false values
mockServingRuntimeModalData({
externalRoute: false,
tokenAuth: false,
},
}),
'test',
mockServingRuntimeK8sResource({ auth: true, route: true }),
false,
Expand All @@ -54,16 +45,10 @@ describe('assembleServingRuntime', () => {

it('should add enable-xxxx annotations when creating', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test true values
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
},
}),
'test',
mockServingRuntimeTemplateK8sResource({}).objects[0] as ServingRuntimeKind,
false,
Expand All @@ -77,16 +62,10 @@ describe('assembleServingRuntime', () => {

it('should add enable-xxxx annotations when editing', async () => {
const servingRuntime = assembleServingRuntime(
{
name: 'my-serving-runtime',
servingRuntimeTemplateName: 'ovms',
numReplicas: 2,
modelSize: { name: 'Small', resources: {} },
tokens: [],
// test true values
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
},
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
false,
Expand All @@ -97,4 +76,62 @@ describe('assembleServingRuntime', () => {
expect(servingRuntime.metadata.annotations?.['enable-auth']).toBe('true');
expect(servingRuntime.metadata.annotations?.['enable-route']).toBe('true');
});

it('should add tolerations and gpu on modelmesh', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
acceleratorState,
true,
);

expect(servingRuntime.spec.tolerations).toBeDefined();
expect(servingRuntime.spec.containers[0].resources?.limits?.['nvidia.com/gpu']).toBe(1);
expect(servingRuntime.spec.containers[0].resources?.requests?.['nvidia.com/gpu']).toBe(1);
});

it('should not add tolerations and gpu on kserve', async () => {
const acceleratorState: AcceleratorState = {
accelerator: mockAcceleratork8sResource({}),
accelerators: [mockAcceleratork8sResource({})],
initialAccelerator: mockAcceleratork8sResource({}),
count: 1,
additionalOptions: {},
useExisting: false,
};

const servingRuntime = assembleServingRuntime(
mockServingRuntimeModalData({
externalRoute: true,
tokenAuth: true,
}),
'test',
mockServingRuntimeK8sResource({ auth: false, route: false }),
true,
false,
acceleratorState,
false,
);

expect(servingRuntime.spec.tolerations).toBeUndefined();
expect(servingRuntime.spec.containers[0].resources?.limits?.['nvidia.com/gpu']).toBeUndefined();
expect(
servingRuntime.spec.containers[0].resources?.requests?.['nvidia.com/gpu'],
).toBeUndefined();
});
});
Loading
Loading