-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
35 lines (25 loc) · 980 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
FROM nvcr.io/nvidia/cuda:12.3.1-base-ubuntu22.04
RUN mkdir /app
COPY . /app
WORKDIR /app
RUN apt-get update
RUN apt-get install python3.11 python3-pip -y
## Install Python dependencies
RUN pip install poetry
RUN poetry install
RUN pip install -r /app/requirements.txt
## Install llama with cuBLAS
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
RUN export LLAMA_CUBLAS=1
## set LD_LIBRARY_PATH for SLURM
RUN export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/hps/software/spack/opt/spack/linux-rocky8-cascadelake/gcc-11.2.0/cuda-11.8.0-yahvkfc4w3re2xnjepiu6hslxvvx7cmz/lib64
## Download the language model
RUN mkdir ./language_models
RUN python3 ./run_download_llm.py \
--repo_id TheBloke/Mistral-7B-SciPhi-32k-GGUF \
--revision c8e8ef4a096a4d516c88fa22951118b87079d454 \
--allow_patterns '*Q5_K_M.gguf' \
--local_dir ./language_models
## SlURM specific set-up
CMD ["module", "load", "cuda"]
CMD ["poetry", "shell"]