Skip to content

Commit

Permalink
added accelerator UI user flow
Browse files Browse the repository at this point in the history
fixed detected accelerator count

connected accelerator detection

added accelerator UI user flow

hide accelerator dropdown when empty

switched the format of the notebook identifier

added accelerator name to serving runtime resource

added serving runtimes accelerators
  • Loading branch information
Gkrumbach07 committed Aug 7, 2023
1 parent 4b2f50b commit 9387956
Show file tree
Hide file tree
Showing 40 changed files with 708 additions and 280 deletions.
7 changes: 7 additions & 0 deletions backend/src/routes/api/gpu/gpuUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ const storage: { lastFetch: number; lastValue: GPUInfo } = {
lastFetch: 0,
};

/**
* @deprecated
*/
export const getGPUNumber = async (fastify: KubeFastifyInstance): Promise<GPUInfo> => {
if (storage.lastFetch >= Date.now() - 30_000) {
fastify.log.info(`Returning cached gpu value (${JSON.stringify(storage)})`);
Expand Down Expand Up @@ -67,11 +70,15 @@ export const getGPUNumber = async (fastify: KubeFastifyInstance): Promise<GPUInf
available: maxGpuNumber,
autoscalers: scalingLimit,
};

storage.lastFetch = Date.now();
storage.lastValue = data;
return data;
};

/**
* @deprecated
*/
export const getGPUData = async (
fastify: KubeFastifyInstance,
podIP: string,
Expand Down
1 change: 0 additions & 1 deletion docs/dashboard_config.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ The `notebookController` field controls the Notebook Controller options such as
```yaml
notebookController:
enabled: true
gpuSetting: autodetect
pvcSize: 20Gi
notebookNamespace: odh-notebooks
notebookTolerationSettings:
Expand Down
1 change: 1 addition & 0 deletions frontend/src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export * from './k8s/users';
export * from './k8s/groups';
export * from './k8s/templates';
export * from './k8s/dashboardConfig';
export * from './k8s/accelerators';

// Pipelines uses special redirected API
export * from './pipelines/custom';
Expand Down
8 changes: 8 additions & 0 deletions frontend/src/api/k8s/accelerators.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { k8sListResource } from '@openshift/dynamic-plugin-sdk-utils';
import { AcceleratorKind } from '~/k8sTypes';
import { AcceleratorModel } from '~/api/models';

export const listAccelerators = async (): Promise<AcceleratorKind[]> =>
k8sListResource<AcceleratorKind>({
model: AcceleratorModel,
}).then((listResource) => listResource.items);
7 changes: 4 additions & 3 deletions frontend/src/api/k8s/notebooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const assembleNotebook = (
description,
notebookSize,
envFrom,
gpus,
accelerator,
image,
volumes: formVolumes,
volumeMounts: formVolumeMounts,
Expand All @@ -51,7 +51,7 @@ const assembleNotebook = (

const { affinity, tolerations, resources } = assemblePodSpecOptions(
notebookSize.resources,
gpus,
accelerator,
tolerationSettings,
);

Expand Down Expand Up @@ -88,6 +88,7 @@ const assembleNotebook = (
'notebooks.opendatahub.io/last-image-selection': imageSelection,
'notebooks.opendatahub.io/inject-oauth': 'true',
'opendatahub.io/username': username,
'opendatahub.io/accelerator-name': accelerator.accelerator?.metadata.name || '',
},
name: notebookId,
namespace: projectName,
Expand Down Expand Up @@ -260,7 +261,7 @@ export const updateNotebook = (

// clean the envFrom array in case of merging the old value again
container.envFrom = [];
// clean the resources, affinity and tolerations for GPU
// clean the resources, affinity and tolerations for accelerator
oldNotebook.spec.template.spec.tolerations = [];
oldNotebook.spec.template.spec.affinity = {};
container.resources = {};
Expand Down
9 changes: 7 additions & 2 deletions frontend/src/api/k8s/servingRuntimes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const assembleServingRuntime = (
isCustomServingRuntimesEnabled: boolean,
isEditing?: boolean,
): ServingRuntimeKind => {
const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth, gpus } = data;
const { name: displayName, numReplicas, modelSize, externalRoute, tokenAuth, accelerator } = data;
const createName = isCustomServingRuntimesEnabled
? translateDisplayNameForK8s(displayName)
: getModelServingRuntimeName(namespace);
Expand All @@ -50,6 +50,7 @@ const assembleServingRuntime = (
}),
...(isCustomServingRuntimesEnabled && {
'opendatahub.io/template-display-name': getDisplayNameFromK8sResource(servingRuntime),
'opendatahub.io/accelerator-name': accelerator.accelerator?.metadata.name || '',
}),
},
};
Expand All @@ -60,6 +61,7 @@ const assembleServingRuntime = (
...updatedServingRuntime.metadata.annotations,
'enable-route': externalRoute ? 'true' : 'false',
'enable-auth': tokenAuth ? 'true' : 'false',
'opendatahub.io/accelerator-name': accelerator.accelerator?.metadata.name || '',
...(isCustomServingRuntimesEnabled && { 'openshift.io/display-name': displayName }),
},
};
Expand All @@ -77,7 +79,10 @@ const assembleServingRuntime = (
},
};

const { affinity, tolerations, resources } = assemblePodSpecOptions(resourceSettings, gpus);
const { affinity, tolerations, resources } = assemblePodSpecOptions(
resourceSettings,
accelerator,
);

updatedServingRuntime.spec.containers = servingRuntime.spec.containers.map((container) => ({
...container,
Expand Down
47 changes: 21 additions & 26 deletions frontend/src/api/k8s/utils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { AcceleratorState } from '~/pages/projects/screens/detail/notebooks/useNotebookAccelerator';
import {
PodAffinity,
ContainerResources,
Expand All @@ -9,48 +10,42 @@ import { determineTolerations } from '~/utilities/tolerations';

export const assemblePodSpecOptions = (
resourceSettings: ContainerResources,
gpus: number,
accelerator: AcceleratorState,
tolerationSettings?: TolerationSettings,
affinitySettings?: PodAffinity,
): {
affinity: PodAffinity;
tolerations: PodToleration[];
resources: ContainerResources;
} => {
let affinity: PodAffinity = structuredClone(affinitySettings || {});
const affinity: PodAffinity = structuredClone(affinitySettings || {});
const resources = structuredClone(resourceSettings);
if (gpus > 0) {
if (accelerator.count > 0 && accelerator.accelerator) {
if (!resources.limits) {
resources.limits = {};
}
if (!resources.requests) {
resources.requests = {};
}
resources.limits[ContainerResourceAttributes.NVIDIA_GPU] = gpus;
resources.requests[ContainerResourceAttributes.NVIDIA_GPU] = gpus;
resources.limits[accelerator.accelerator.spec.identifier] = accelerator.count;
resources.requests[accelerator.accelerator.spec.identifier] = accelerator.count;
} else {
delete resources.limits?.[ContainerResourceAttributes.NVIDIA_GPU];
delete resources.requests?.[ContainerResourceAttributes.NVIDIA_GPU];
affinity = {
nodeAffinity: {
preferredDuringSchedulingIgnoredDuringExecution: [
{
preference: {
matchExpressions: [
{
key: 'nvidia.com/gpu.present',
operator: 'NotIn',
values: ['true'],
},
],
},
weight: 1,
},
],
},
};
// step type down to string to avoid type errors
const containerResourceKeys: string[] = Object.keys(ContainerResourceAttributes);

Object.keys(resources.limits || {}).forEach((key) => {
if (!containerResourceKeys.includes(key)) {
delete resources.limits?.[key];
}
});

Object.keys(resources.requests || {}).forEach((key) => {
if (!containerResourceKeys.includes(key)) {
delete resources.requests?.[key];
}
});
}

const tolerations = determineTolerations(gpus > 0, tolerationSettings);
const tolerations = determineTolerations(tolerationSettings, accelerator.accelerator);
return { affinity, tolerations, resources };
};
7 changes: 7 additions & 0 deletions frontend/src/api/models/openShift.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,10 @@ export const TemplateModel: K8sModelCommon = {
kind: 'Template',
plural: 'templates',
};

export const AcceleratorModel: K8sModelCommon = {
apiVersion: 'v1alpha',
apiGroup: 'dashboard.opendatahub.io',
kind: 'AcceleratorProfile',
plural: 'acceleratorprofiles',
};
3 changes: 3 additions & 0 deletions frontend/src/components/SimpleDropdownSelect.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.full-width {
width: 100%;
}
52 changes: 37 additions & 15 deletions frontend/src/components/SimpleDropdownSelect.tsx
Original file line number Diff line number Diff line change
@@ -1,42 +1,64 @@
import * as React from 'react';
import { Dropdown, DropdownItem, DropdownToggle } from '@patternfly/react-core';
import './SimpleDropdownSelect.scss';

type SimpleDropdownProps = {
options: { key: string; label: React.ReactNode }[];
options: {
key: string;
label: React.ReactNode;
description?: React.ReactNode;
selectedLabel?: React.ReactNode;
isPlaceholder?: boolean;
}[];
value: string;
placeholder?: string;
onChange: (key: string) => void;
onChange: (key: string, isPlaceholder: boolean) => void;
isFullWidth?: boolean;
isDisabled?: boolean;
} & Omit<React.ComponentProps<typeof Dropdown>, 'isOpen' | 'toggle' | 'dropdownItems' | 'onChange'>;

const SimpleDropdownSelect: React.FC<SimpleDropdownProps> = ({
onChange,
options,
placeholder = 'Select...',
value,
isFullWidth,
isDisabled,
...props
}) => {
const [open, setOpen] = React.useState(false);

const selectedOption = options.find(({ key }) => key === value);
const selectedLabel = selectedOption?.selectedLabel ?? selectedOption?.label ?? placeholder;

return (
<Dropdown
{...props}
isOpen={open}
className={isFullWidth ? 'full-width' : undefined}
toggle={
<DropdownToggle onToggle={() => setOpen(!open)}>
<>{options.find(({ key }) => key === value)?.label ?? placeholder}</>
<DropdownToggle
isDisabled={isDisabled}
className={isFullWidth ? 'full-width' : undefined}
onToggle={() => setOpen(!open)}
>
<>{selectedLabel}</>
</DropdownToggle>
}
dropdownItems={options.map(({ key, label }) => (
<DropdownItem
key={key}
onClick={() => {
onChange(key);
setOpen(false);
}}
>
{label}
</DropdownItem>
))}
dropdownItems={options
.sort((a, b) => (a.isPlaceholder === b.isPlaceholder ? 0 : a.isPlaceholder ? -1 : 1))
.map(({ key, label, description, isPlaceholder }) => (
<DropdownItem
key={key}
description={description}
onClick={() => {
onChange(key, !!isPlaceholder);
setOpen(false);
}}
>
{isPlaceholder ? <i>{label}</i> : label}
</DropdownItem>
))}
/>
);
};
Expand Down
24 changes: 23 additions & 1 deletion frontend/src/k8sTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ type DisplayNameAnnotations = Partial<{

export type K8sDSGResource = K8sResourceCommon & {
metadata: {
annotations?: DisplayNameAnnotations;
annotations?: DisplayNameAnnotations &
Partial<{
'opendatahub.io/recommended-accelerators': string;
}>;
name: string;
};
};
Expand All @@ -69,6 +72,7 @@ export type NotebookAnnotations = Partial<{
'opendatahub.io/username': string; // the untranslated username behind the notebook
'notebooks.opendatahub.io/last-image-selection': string; // the last image they selected
'notebooks.opendatahub.io/last-size-selection': string; // the last notebook size they selected
'opendatahub.io/accelerator-name': string; // the accelerator attached to the notebook
}>;

export type DashboardLabels = {
Expand All @@ -91,6 +95,8 @@ export type ServingRuntimeAnnotations = Partial<{
'opendatahub.io/template-name': string;
'opendatahub.io/template-display-name': string;
'opendatahub.io/disable-gpu': string;
'opendatahub.io/recommended-accelerators': string;
'opendatahub.io/accelerator-name': string;
'enable-route': string;
'enable-auth': string;
}>;
Expand Down Expand Up @@ -715,3 +721,19 @@ export type DashboardConfigKind = K8sResourceCommon & {
templateOrder?: string[];
};
};

export type AcceleratorKind = K8sResourceCommon & {
metadata: {
name: string;
annotations?: Partial<{
'opendatahub.io/modified-date': string;
}>;
};
spec: {
displayName: string;
enabled: boolean;
identifier: string;
description?: string;
tolerations?: PodToleration[];
};
};
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import {
} from '@patternfly/react-core';
import { ServingRuntimeKind } from '~/k8sTypes';
import { AppContext } from '~/app/AppContext';
import { ContainerResourceAttributes } from '~/types';
import { getServingRuntimeSizes } from './utils';
import useServingRuntimeAccelerator from './useServingRuntimeAccelerator';

type ServingRuntimeDetailsProps = {
obj: ServingRuntimeKind;
Expand All @@ -22,6 +22,7 @@ const ServingRuntimeDetails: React.FC<ServingRuntimeDetailsProps> = ({ obj }) =>
const container = obj.spec.containers[0]; // can we assume the first container?
const sizes = getServingRuntimeSizes(dashboardConfig);
const size = sizes.find((size) => _.isEqual(size.resources, container.resources));
const [accelerator] = useServingRuntimeAccelerator(obj);

return (
<DescriptionList isHorizontal horizontalTermWidthModifier={{ default: '250px' }}>
Expand All @@ -44,11 +45,15 @@ const ServingRuntimeDetails: React.FC<ServingRuntimeDetailsProps> = ({ obj }) =>
</DescriptionListDescription>
</DescriptionListGroup>
<DescriptionListGroup>
<DescriptionListTerm>Number of GPUs</DescriptionListTerm>
<DescriptionListTerm>Accelerator</DescriptionListTerm>
<DescriptionListDescription>
{container.resources.limits?.[ContainerResourceAttributes.NVIDIA_GPU] || 0}
{accelerator.accelerator?.spec.displayName || 'unknown'}
</DescriptionListDescription>
</DescriptionListGroup>
<DescriptionListGroup>
<DescriptionListTerm>Number of accelerators</DescriptionListTerm>
<DescriptionListDescription>{accelerator.count}</DescriptionListDescription>
</DescriptionListGroup>
</DescriptionList>
);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ const ManageServingRuntimeModal: React.FC<ManageServingRuntimeModalProps> = ({
}
const servingRuntimeData = {
...createData,
gpus: isGpuDisabled(servingRuntimeSelected) ? 0 : createData.gpus,
accelerator: isGpuDisabled(servingRuntimeSelected)
? { accelerator: undefined, count: 0 }
: createData.accelerator,
};
const servingRuntimeName = translateDisplayNameForK8s(servingRuntimeData.name);
const createRolebinding = servingRuntimeData.tokenAuth && allowCreate;
Expand Down
Loading

0 comments on commit 9387956

Please sign in to comment.