-
Notifications
You must be signed in to change notification settings - Fork 962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add juicefsruntime dataload function #1539
Changes from 84 commits
1964301
323f39e
c57c026
581852d
2e58404
e69c034
d25d80b
a5611de
d10384c
e02a0e4
197a643
f352d7e
248c461
9e73667
fb4be87
4aae2c4
a4bb7da
6c316c9
3c94ac7
c7063d8
6f8d5e3
49024f8
f2df89a
3810f2c
fffba9f
bcd6bc9
4ffa771
b74f652
747ef31
be3ca71
53de3dc
24c05b5
54b5d4a
3998bad
772273c
65afbf7
aa09642
42c0bde
c8a8fd3
eefc46a
b3554af
cbcacf5
afddbbe
2709928
309c942
d44a69a
a25bd08
23b5c8d
6db2060
1be5ed0
b37e587
45e5d4d
0f23ad9
62f60bf
0c38f4a
8fd42c4
0e493da
cac1fec
c852a78
74a88d5
e2ab343
ef5ca12
acb662b
531c54b
500f2a6
4d402bf
0f76196
ccc2b2d
d17c5cd
4ebe2b5
7976904
7bbb015
a5ceee1
c8bfb12
0b471a0
b2d9f14
67667cb
3f758b3
fde4b07
ba97ed9
17500b7
51a3e6d
8d61be8
e22f938
d40a378
0914e31
c254c0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
### 0.1.0 | ||
|
||
- Support parallel prefetch job | ||
- Support configurations by setting values |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
apiVersion: v2 | ||
name: fluid-dataloader | ||
description: A Helm chart for Fluid to prefetch data | ||
|
||
# A chart can be either an 'application' or a 'library' chart. | ||
# | ||
# Application charts are a collection of templates that can be packaged into versioned archives | ||
# to be deployed. | ||
# | ||
# Library charts provide useful utilities or functions for the chart developer. They're included as | ||
# a dependency of application charts to inject those utilities and functions into the rendering | ||
# pipeline. Library charts do not define any templates and therefore cannot be deployed. | ||
type: application | ||
|
||
# This is the chart version. This version number should be incremented each time you make changes | ||
# to the chart and its templates, including the app version. | ||
# Versions are expected to follow Semantic Versioning (https://semver.org/) | ||
version: 0.1.0 | ||
|
||
# This is the version number of the application being deployed. This version number should be | ||
# incremented each time you make changes to the application. Versions are not expected to | ||
# follow Semantic Versioning. They should reflect the version the application is using. | ||
appVersion: 0.1.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
apiVersion: v1 | ||
kind: ConfigMap | ||
metadata: | ||
name: {{ printf "%s-data-load-script" .Release.Name }} | ||
labels: | ||
release: {{ .Release.Name }} | ||
role: dataload-job | ||
data: | ||
dataloader.distributedLoad: | | ||
#!/usr/bin/env bash | ||
set -xe | ||
|
||
function main() { | ||
paths="$DATA_PATH" | ||
paths=(${paths//:/ }) | ||
|
||
podNames="$POD_NAMES" | ||
podNames=(${podNames//:/ }) | ||
|
||
ns="$POD_NAMESPACE" | ||
for((i=0;i<${#podNames[@]};i++)) do | ||
local pod="${podNames[i]}" | ||
|
||
for((j=0;j<${#paths[@]};j++)) do | ||
echo -e "juicefs warmup on $pod ${paths[j]} starts" | ||
/usr/local/bin/kubectl -n $ns exec -it $pod -- $COMMAND | ||
/usr/local/bin/kubectl -n $ns exec -it $pod -- juicefs warmup $MOUNTPATH${paths[j]} | ||
/usr/local/bin/kubectl -n $ns exec -it $pod -- umount $MOUNTPATH | ||
echo -e "juicefs warmup on $pod ${paths[j]} ends" | ||
done | ||
done | ||
} | ||
main "$@" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# .Release.Name will be used to decide which dataset will be preload | ||
# .Release.Name should be like `<pvc-name>-load`(e.g. hbase-load for a PersistentVolumeClaim named `hbase`) | ||
# TODO: the length of .Release.Name won't exceed 53(limited by Helm), which means length of `<pvc-name>` can't exceed 48. This might be a problem. | ||
{{/* {{ $datasetName := "" -}}*/}} | ||
{{/* {{- $randomSuffix := "" -}}*/}} | ||
{{/* {{- if regexMatch "^[A-Za-z0-9._-]+-load-[A-Za-z0-9]{5}$" .Release.Name -}}*/}} | ||
{{/* {{- $arr := regexSplit "-load-" .Release.Name -1 -}}*/}} | ||
{{/* {{- $datasetName = first $arr -}}*/}} | ||
{{/* {{- $randomSuffix = last $arr -}}*/}} | ||
{{/* {{- else -}}*/}} | ||
{{/* {{- printf "Illegal release name. Should be like <dataset-name>-load-<suffix-length-5>. Current name: %s" .Release.Name | fail -}}*/}} | ||
{{/* {{- end }}*/}} | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: {{ printf "%s-job" .Release.Name }} | ||
labels: | ||
release: {{ .Release.Name }} | ||
role: dataload-job | ||
app: juicefs | ||
targetDataset: {{ required "targetDataset should be set" .Values.dataloader.targetDataset }} | ||
spec: | ||
backoffLimit: {{ .Values.dataloader.backoffLimit | default "3" }} | ||
completions: 1 | ||
parallelism: 1 | ||
template: | ||
metadata: | ||
name: {{ printf "%s-loader" .Release.Name }} | ||
labels: | ||
release: {{ .Release.Name }} | ||
role: dataload-pod | ||
app: juicefs | ||
targetDataset: {{ required "targetDataset should be set" .Values.dataloader.targetDataset }} | ||
spec: | ||
restartPolicy: OnFailure | ||
{{- range $key, $val := .Values.dataloader.options }} | ||
{{- if eq $key "runtimeName" }} | ||
serviceAccountName: {{ printf "%s-loader" $val | quote }} | ||
{{- end }} | ||
{{- end }} | ||
containers: | ||
- name: dataloader | ||
image: {{ required "Dataloader image should be set" .Values.dataloader.image }} | ||
imagePullPolicy: IfNotPresent | ||
command: ["/bin/sh", "-c"] | ||
args: ["/scripts/juicefs_dataload.sh"] | ||
{{- $targetPaths := "" }} | ||
{{- range .Values.dataloader.targetPaths }} | ||
{{- $targetPaths = cat $targetPaths (required "Path must be set" .path) ":" }} | ||
{{- end }} | ||
{{- $targetPaths = $targetPaths | nospace | trimSuffix ":" }} | ||
|
||
{{- $pathReplicas := ""}} | ||
{{- range .Values.dataloader.targetPaths }} | ||
{{- $pathReplicas = cat $pathReplicas ( default 1 .replicas ) ":"}} | ||
{{- end }} | ||
{{- $pathReplicas = $pathReplicas | nospace | trimSuffix ":"}} | ||
|
||
env: | ||
- name: STORAGE_ADDRESS | ||
valueFrom: | ||
fieldRef: | ||
fieldPath: status.podIP | ||
- name: CACHEDIR2 | ||
value: /test | ||
{{- range $key, $val := .Values.dataloader.options }} | ||
{{- if eq $key "cachedir" }} | ||
- name: CACHEDIR | ||
value: {{ $val | quote }} | ||
{{- end }} | ||
{{- if eq $key "mountpath" }} | ||
- name: MOUNTPATH | ||
value: {{ $val | quote }} | ||
{{- end }} | ||
{{- if eq $key "command" }} | ||
- name: COMMAND | ||
value: {{ $val | quote }} | ||
{{- end }} | ||
{{- end }} | ||
- name: DATA_PATH | ||
value: {{ $targetPaths | quote }} | ||
- name: PATH_REPLICAS | ||
value: {{ $pathReplicas | quote }} | ||
{{- range $key, $val := .Values.dataloader.options }} | ||
{{- if eq $key "podNames" }} | ||
- name: POD_NAMES | ||
value: {{ $val | quote }} | ||
{{- end }} | ||
{{- end }} | ||
- name: POD_NAMESPACE | ||
value: {{ .Release.Namespace | quote }} | ||
envFrom: | ||
- configMapRef: | ||
name: {{ required "targetDataset should be set" .Values.dataloader.targetDataset }}-juicefs-values | ||
securityContext: | ||
privileged: true | ||
volumeMounts: | ||
- mountPath: /scripts | ||
name: data-load-script | ||
{{- range .Values.dataloader.targetPaths }} | ||
{{- if .fluidNative }} | ||
- mountPath: {{ .path | trimAll "/" | replace "/" "-" | printf "/data/%s"}} | ||
name: {{ .path | trimAll "/" | replace "/" "-" | printf "native-%s"}} | ||
{{- end }} | ||
{{- end }} | ||
{{- range $key, $val := .Values.dataloader.options }} | ||
{{- if eq $key "cachedir" }} | ||
- mountPath: {{ $val | quote }} | ||
{{- end }} | ||
{{- end }} | ||
name: cachedir | ||
{{- range $key, $val := .Values.dataloader.options }} | ||
{{- if eq $key "mountpath" }} | ||
- mountPath: {{ $val | quote }} | ||
{{- end }} | ||
{{- end }} | ||
name: mountpath1 | ||
volumes: | ||
- name: data-load-script | ||
configMap: | ||
name: {{ printf "%s-data-load-script" .Release.Name }} | ||
items: | ||
- key: dataloader.distributedLoad | ||
path: juicefs_dataload.sh | ||
mode: 365 | ||
{{- range .Values.dataloader.targetPaths }} | ||
{{- if .fluidNative }} | ||
- name: {{ .path | trimAll "/" | replace "/" "-" | printf "native-%s"}} | ||
hostPath: | ||
path: {{ .path }} | ||
{{- end }} | ||
{{- end }} | ||
- name: cachedir | ||
{{- range $key, $val := .Values.dataloader.options }} | ||
{{- if eq $key "cachedir" }} | ||
hostPath: | ||
path: {{ $val | quote }} | ||
{{- end }} | ||
{{- end }} | ||
- name: mountpath1 | ||
{{- range $key, $val := .Values.dataloader.options }} | ||
{{- if eq $key "mountpath" }} | ||
hostPath: | ||
path: {{ $val | quote }} | ||
{{- end }} | ||
{{- end }} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Default values for fluid-dataloader. | ||
# This is a YAML-formatted file. | ||
# Declare variables to be passed into your templates. | ||
|
||
dataloader: | ||
# Optional | ||
# Default: 3 | ||
# Description: how many times the prefetch job can fail, i.e. `Job.spec.backoffLimit` | ||
backoffLimit: 3 | ||
|
||
# Required | ||
# Description: the dataset that this DataLoad targets | ||
targetDataset: #imagenet | ||
|
||
# Optional | ||
# Default: false | ||
# Description: should load metadata from UFS when doing data load | ||
loadMetadata: false | ||
|
||
# Optional | ||
# Default: (path: "/", replicas: 1, fluidNative: false) | ||
# Description: which paths should the DataLoad load | ||
targetPaths: | ||
- path: "/" | ||
replicas: 1 | ||
fluidNative: false | ||
|
||
# Required | ||
# Description: the image that the DataLoad job uses | ||
image: #<juicefs-image> | ||
|
||
# Optional | ||
# Description: optional parameter DataLoad job uses | ||
options: |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,12 +46,6 @@ rules: | |
- juicefsruntimes/status | ||
verbs: | ||
- '*' | ||
- apiGroups: | ||
- "" | ||
resources: | ||
- events | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
verbs: | ||
- '*' | ||
- apiGroups: | ||
- apps | ||
resources: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: {{ printf "%s-loader" .Release.Name }} | ||
rules: | ||
- apiGroups: | ||
- "" | ||
resources: | ||
- pods | ||
- pods/exec | ||
verbs: | ||
- get | ||
- create | ||
- list | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRoleBinding | ||
metadata: | ||
name: {{ printf "%s-loader" .Release.Name }} | ||
roleRef: | ||
apiGroup: rbac.authorization.k8s.io | ||
kind: ClusterRole | ||
name: {{ printf "%s-loader" .Release.Name }} | ||
subjects: | ||
- kind: ServiceAccount | ||
name: {{ printf "%s-loader" .Release.Name }} | ||
namespace: {{ .Release.Namespace | quote }} | ||
--- | ||
apiVersion: v1 | ||
kind: ServiceAccount | ||
metadata: | ||
name: {{ printf "%s-loader" .Release.Name }} | ||
namespace: {{ .Release.Namespace | quote }} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,24 +59,53 @@ spec: | |
{{- end }} | ||
{{- end }} | ||
command: ["sh", "-c", "sleep infinity"] | ||
securityContext: | ||
privileged: true | ||
{{- if .Values.worker.ports }} | ||
ports: | ||
{{ toYaml .Values.worker.ports | trim | indent 10 }} | ||
{{- end }} | ||
{{- if .Values.worker.envs }} | ||
env: | ||
{{- if .Values.worker.envs }} | ||
{{ toYaml .Values.worker.envs | trim | indent 10 }} | ||
{{- end }} | ||
{{- if .Values.worker.cacheDir }} | ||
{{- if .Values.fuse.metaurlSecret }} | ||
- name: METAURL | ||
valueFrom: | ||
secretKeyRef: | ||
name: {{ .Values.fuse.metaurlSecret }} | ||
key: metaurl | ||
{{- end }} | ||
{{- if .Values.fuse.accesskeySecret }} | ||
- name: ACCESS_KEY | ||
valueFrom: | ||
secretKeyRef: | ||
name: {{ .Values.fuse.accesskeySecret }} | ||
key: access-key | ||
{{- end }} | ||
{{- if .Values.fuse.secretkeySecret }} | ||
- name: SECRET_KEY | ||
valueFrom: | ||
secretKeyRef: | ||
name: {{ .Values.fuse.secretkeySecret }} | ||
key: secret-key | ||
{{- end }} | ||
volumeMounts: | ||
- mountPath: /root/script | ||
name: script | ||
{{- if .Values.worker.cacheDir }} | ||
- name: cache-dir | ||
mountPath: {{ .Values.worker.cacheDir }} | ||
{{- end }} | ||
{{- end }} | ||
restartPolicy: Always | ||
{{- if .Values.worker.cacheDir }} | ||
volumes: | ||
{{- if .Values.worker.cacheDir }} | ||
- name: cache-dir | ||
hostPath: | ||
path: {{ .Values.worker.cacheDir }} | ||
type: DirectoryOrCreate | ||
{{- end }} | ||
{{- end }} | ||
- name: script | ||
configMap: | ||
name: {{ template "juicefs.fullname" . }}-script | ||
defaultMode: 0777 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not setting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will you add
timeout
to avoid the job hang?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done