-
Notifications
You must be signed in to change notification settings - Fork 5
/
pipeline.sh
executable file
·135 lines (108 loc) · 3.19 KB
/
pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env bash
#########################
# To test uncommitted changes and/or individual pipeline stages
# replace `dvc repro` with something analogous to:
# dvc exp run \
# -S config.yaml:model_training.pancreas_model1.training_parameters.max_epochs=421 \
# train_pancreas_model1
# TODO: remove npm update ... after container rebuild
# DONE: replace npx ... with cml
# npx github:iterative/cml#b923f3e pr create --skip-ci .
# https://github.com/iterative/cml/issues/1344 resolved in
# https://github.com/iterative/cml/releases/tag/v0.18.21
# TODO: move matplotlib-venn to package manager if persistent
# TODO: move /usr/bin/time install to Docker
#########################
set -x
### Define parallel execution function ###
function run_parallel_pipeline() {
dvc stage list --name-only |\
grep -E "data_download*" |\
xargs -t -n 1 -P 4 bash -c 'sleep $((RANDOM % 8 + 3)); dvc repro "$@"' --
wait
dvc stage list --name-only |\
grep -E "preprocess*" |\
xargs -t -n 1 -P 4 bash -c 'sleep $((RANDOM % 8 + 3)); dvc repro "$@"' --
wait
# manually execute training stages to distribute over four GPUs
dvc repro train@pancreas_model2 &
sleep 2
dvc repro train@pbmc68k_model2 &
sleep 2
dvc repro train@pons_model2 &
sleep 2
dvc repro train@larry_model2 &
wait
dvc repro train@larry_tips_model2 &
sleep 2
dvc repro train@larry_mono_model2 &
sleep 2
dvc repro train@larry_neu_model2 &
sleep 2
dvc repro train@larry_multilineage_model2 &
wait
dvc repro train@pbmc10k_model2 &
wait
dvc stage list --name-only |\
grep -E "postprocess*" |\
xargs -t -n 1 -P 4 bash -c 'sleep $((RANDOM % 8 + 3)); dvc repro "$@"' --
wait
dvc stage list --name-only |\
grep -E "summarize*" |\
xargs -t -n 1 -P 4 bash -c 'sleep $((RANDOM % 8 + 3)); dvc repro "$@"' --
wait
}
### Execute experiment run and submit PR ###
python --version
pip install --upgrade pip
pip --version
pip install -e .[plotting]
pip list
sudo apt-get update && sudo apt-get install -y time && which time
cd reproducibility/figures || exit
dvc pull
run_parallel_pipeline
dvc repro
dvc push
npm update -g @dvcorg/cml
cml pr create --skip-ci .
#########################
generate_markdown() {
local data_set="$1"
local model="$2"
printf "\n# %s\n\n## %s\n\n### Metrics\n" "$data_set" "$model"
dvc metrics show --md "models/${data_set}_${model}/metrics.json"
echo "### Training plots"
echo '!'"[ELBO](./models/${data_set}_${model}/loss_plot.png)"
echo "### Run information"
printf "\n\`\`\`json\n"
cat "models/${data_set}_${model}/run_info.json"
printf "\n\`\`\`\n"
}
### Post experiment report comment ###
{
printf "# pipeline\n"
dvc dag --md
data_sets=(
"pancreas"
"pons"
"pbmc68k"
"pbmc10k"
"larry"
"larry_mono"
"larry_neu"
"larry_multilineage"
"larry_tips"
)
models=("model2")
for data_set in "${data_sets[@]}"; do
for model in "${models[@]}"; do
generate_markdown "$data_set" "$model"
done
done
printf "# pipeline files\n"
dvc dag -o --md
} >> report.md
cml comment update report.md
#########################
set +x