Skip to content

Commit

Permalink
Add offline support for LMEvalJobs (#351)
Browse files Browse the repository at this point in the history
  • Loading branch information
ruivieira authored Nov 4, 2024
1 parent af34422 commit f5163e9
Show file tree
Hide file tree
Showing 6 changed files with 417 additions and 0 deletions.
17 changes: 17 additions & 0 deletions api/lmes/v1alpha1/lmevaljob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,16 @@ func (p *LMEvalPodSpec) GetSideCards() []corev1.Container {
return p.SideCars
}

// OfflineStorageSpec defines the storage configuration for LMEvalJob's offline mode
type OfflineStorageSpec struct {
PersistentVolumeClaimName string `json:"pvcName"`
}

// OfflineSpec defined the configuration for LMEvalJob's offline mode
type OfflineSpec struct {
StorageSpec OfflineStorageSpec `json:"storage"`
}

// LMEvalJobSpec defines the desired state of LMEvalJob
type LMEvalJobSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
Expand Down Expand Up @@ -257,6 +267,13 @@ type LMEvalJobSpec struct {
// Outputs specifies storage for evaluation results
// +optional
Outputs *Outputs `json:"outputs,omitempty"`
// Offline specifies settings for running LMEvalJobs in a offline mode
Offline *OfflineSpec `json:"offline,omitempty"`
}

// IsOffline returns whether this LMEvalJob is configured to run offline
func (s *LMEvalJobSpec) IsOffline() bool {
return s.Offline != nil
}

// HasCustomOutput returns whether an LMEvalJobSpec defines custom outputs or not
Expand Down
36 changes: 36 additions & 0 deletions api/lmes/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ spec:
numFewShot:
description: Sets the number of few-shot examples to place in context
type: integer
offline:
description: Offline specifies settings for running LMEvalJobs in
a offline mode
properties:
storage:
description: OfflineStorageSpec defines the storage configuration
for LMEvalJob's offline mode
properties:
pvcName:
type: string
required:
- pvcName
type: object
required:
- storage
type: object
outputs:
description: Outputs specifies storage for evaluation results
properties:
Expand Down
1 change: 1 addition & 0 deletions controllers/lmes/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
DriverPath = "/bin/driver"
DestDriverPath = "/opt/app-root/src/bin/driver"
OutputPath = "/opt/app-root/src/output"
HuggingFaceHomePath = "/opt/app-root/src/hf_home"
PodImageKey = "lmes-pod-image"
DriverImageKey = "lmes-driver-image"
PodCheckingIntervalKey = "lmes-pod-checking-interval"
Expand Down
35 changes: 35 additions & 0 deletions controllers/lmes/lmevaljob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,41 @@ func createPod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo
volumes = append(volumes, outputPVC)
}

// If the job is supposed to run offline, set the appropriate HuggingFace offline flags
if job.Spec.IsOffline() {

offlineHuggingFaceEnvVars := []corev1.EnvVar{
{
Name: "HF_DATASETS_OFFLINE",
Value: "1",
},
{
Name: "HF_HUB_OFFLINE",
Value: "1",
},
}
envVars = append(envVars, offlineHuggingFaceEnvVars...)

// If the job is offline, a storage must be set. PVC is the only supported storage backend at the moment.
offlinePVCMount := corev1.VolumeMount{
Name: "offline",
MountPath: HuggingFaceHomePath,
}
volumeMounts = append(volumeMounts, offlinePVCMount)

offlinePVC := corev1.Volume{
Name: "offline",
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: job.Spec.Offline.StorageSpec.PersistentVolumeClaimName,
ReadOnly: false,
},
},
}
volumes = append(volumes, offlinePVC)

}

volumes = append(volumes, job.Spec.Pod.GetVolumes()...)
volumeMounts = append(volumeMounts, job.Spec.Pod.GetContainer().GetVolumMounts()...)
labels := getPodLabels(job.Labels, log)
Expand Down
Loading

0 comments on commit f5163e9

Please sign in to comment.