diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2db687a287ef1..fc97e33c19af2 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -49,7 +49,7 @@ jobs: matrix: os: ['ubuntu-20.04'] python-version: ['3.8', '3.9', '3.10', '3.11'] - pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements.txt. + pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements-cuda.txt. cuda-version: ['11.8', '12.1'] steps: diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index ed200fe724d3e..60a3978f9abd7 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -9,7 +9,7 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH # Install requirements $python_executable -m pip install wheel packaging -$python_executable -m pip install -r requirements.txt +$python_executable -m pip install -r requirements-cuda.txt # Limit the number of parallel jobs to avoid OOM export MAX_JOBS=1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index befd61ff516e0..81a8db2b268b0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,7 +21,6 @@ Express your support on Twitter if vLLM aids you, or simply offer your appreciat ### Build from source ```bash -pip install -r requirements.txt pip install -e . # This may take several minutes. ``` diff --git a/Dockerfile b/Dockerfile index 71c0646b76465..d1d29177b0f44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,9 +17,10 @@ RUN ldconfig /usr/local/cuda-12.1/compat/ WORKDIR /workspace # install build and runtime dependencies -COPY requirements.txt requirements.txt +COPY requirements-common.txt requirements-common.txt +COPY requirements-cuda.txt requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements.txt + pip install -r requirements-cuda.txt # install development dependencies COPY requirements-dev.txt requirements-dev.txt @@ -51,7 +52,8 @@ COPY csrc csrc COPY setup.py setup.py COPY cmake cmake COPY CMakeLists.txt CMakeLists.txt -COPY requirements.txt requirements.txt +COPY requirements-common.txt requirements-common.txt +COPY requirements-cuda.txt requirements-cuda.txt COPY pyproject.toml pyproject.toml COPY vllm vllm diff --git a/MANIFEST.in b/MANIFEST.in index aa16da6500e6c..d385f194c6c0f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include LICENSE -include requirements.txt +include requirements-common.txt +include requirements-cuda.txt include CMakeLists.txt recursive-include cmake * diff --git a/requirements.txt b/requirements-common.txt similarity index 52% rename from requirements.txt rename to requirements-common.txt index ce701a9f82068..9a75cec18bb66 100644 --- a/requirements.txt +++ b/requirements-common.txt @@ -1,20 +1,14 @@ -cmake>=3.21 +cmake >= 3.21 ninja # For faster builds. psutil -ray >= 2.9 sentencepiece # Required for LLaMA tokenizer. numpy -torch == 2.2.1 requests py-cpuinfo transformers >= 4.39.1 # Required for StarCoder2 & Llava. -xformers == 0.0.25 # Requires PyTorch 2.2.1. fastapi uvicorn[standard] pydantic >= 2.0 # Required for OpenAI server. prometheus_client >= 0.18.0 -pynvml == 11.5.0 -triton >= 2.1.0 -outlines == 0.0.34 -tiktoken == 0.6.0 # Required for DBRX tokenizer -vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library +tiktoken == 0.6.0 # Required for DBRX tokenizer +outlines == 0.0.34 # Requires torch >= 2.1.0 diff --git a/requirements-cpu.txt b/requirements-cpu.txt index d80c18be2be03..36d20bc9473ea 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -1,15 +1,6 @@ -cmake>=3.21 -ninja # For faster builds. -psutil -ray >= 2.9 -sentencepiece # Required for LLaMA tokenizer. -numpy -transformers >= 4.38.0 # Required for Gemma. -fastapi -uvicorn[standard] -pydantic >= 2.0 # Required for OpenAI server. -prometheus_client >= 0.18.0 -torch == 2.2.1+cpu -triton >= 2.1.0 -filelock == 3.13.3 -py-cpuinfo \ No newline at end of file +# Common dependencies +-r requirements-common.txt + +# Dependencies for x86_64 CPUs +torch == 2.2.1+cpu +triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error. diff --git a/requirements-cuda.txt b/requirements-cuda.txt new file mode 100644 index 0000000000000..6ee75e8139c04 --- /dev/null +++ b/requirements-cuda.txt @@ -0,0 +1,10 @@ +# Common dependencies +-r requirements-common.txt + +# Dependencies for NVIDIA GPUs +ray >= 2.9 +pynvml == 11.5.0 +vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library +torch == 2.2.1 +xformers == 0.0.25 # Requires PyTorch 2.2.1 +triton >= 2.1.0 diff --git a/requirements-neuron.txt b/requirements-neuron.txt index 6828bd4fd1fce..92b705b4b2d67 100644 --- a/requirements-neuron.txt +++ b/requirements-neuron.txt @@ -1,12 +1,7 @@ -sentencepiece # Required for LLaMA tokenizer. -numpy +# Common dependencies +-r requirements-common.txt + +# Dependencies for Neuron devices transformers-neuronx >= 0.9.0 torch-neuronx >= 2.1.0 neuronx-cc -fastapi -uvicorn[standard] -pydantic >= 2.0 # Required for OpenAI server. -prometheus_client >= 0.18.0 -requests -psutil -py-cpuinfo \ No newline at end of file diff --git a/requirements-rocm.txt b/requirements-rocm.txt index 4e9f598551fee..903845b64d98f 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -1,18 +1,5 @@ -cmake>=3.21 -ninja # For faster builds. -typing-extensions>=4.8.0 -starlette -requests -py-cpuinfo -psutil +# Common dependencies +-r requirements-common.txt + +# Dependencies for AMD GPUs ray == 2.9.3 -sentencepiece # Required for LLaMA tokenizer. -numpy -tokenizers>=0.15.0 -transformers >= 4.39.1 # Required for StarCoder2 & Llava. -fastapi -uvicorn[standard] -pydantic >= 2.0 # Required for OpenAI server. -prometheus_client >= 0.18.0 -outlines == 0.0.34 -tiktoken == 0.6.0 # Required for DBRX tokenizer diff --git a/setup.py b/setup.py index d64af4283863c..98c92f9196e7e 100644 --- a/setup.py +++ b/setup.py @@ -325,32 +325,38 @@ def read_readme() -> str: def get_requirements() -> List[str]: """Get Python package dependencies from requirements.txt.""" - if _is_cuda(): - with open(get_path("requirements.txt")) as f: + + def _read_requirements(filename: str) -> List[str]: + with open(get_path(filename)) as f: requirements = f.read().strip().split("\n") - cuda_major = torch.version.cuda.split(".")[0] - modified_requirements = [] - for req in requirements: - if "vllm-nccl-cu12" in req: - modified_requirements.append( - req.replace("vllm-nccl-cu12", - f"vllm-nccl-cu{cuda_major}")) - else: - modified_requirements.append(req) - requirements = modified_requirements + resolved_requirements = [] + for line in requirements: + if line.startswith("-r "): + resolved_requirements += _read_requirements(line.split()[1]) + else: + resolved_requirements.append(line) + return resolved_requirements + + if _is_cuda(): + requirements = _read_requirements("requirements-cuda.txt") + cuda_major = torch.version.cuda.split(".")[0] + modified_requirements = [] + for req in requirements: + if "vllm-nccl-cu12" in req: + modified_requirements.append( + req.replace("vllm-nccl-cu12", f"vllm-nccl-cu{cuda_major}")) + else: + modified_requirements.append(req) + requirements = modified_requirements elif _is_hip(): - with open(get_path("requirements-rocm.txt")) as f: - requirements = f.read().strip().split("\n") + requirements = _read_requirements("requirements-rocm.txt") elif _is_neuron(): - with open(get_path("requirements-neuron.txt")) as f: - requirements = f.read().strip().split("\n") + requirements = _read_requirements("requirements-neuron.txt") elif _is_cpu(): - with open(get_path("requirements-cpu.txt")) as f: - requirements = f.read().strip().split("\n") + requirements = _read_requirements("requirements-cpu.txt") else: raise ValueError( - "Unsupported platform, please use CUDA, ROCM or Neuron.") - + "Unsupported platform, please use CUDA, ROCm, Neuron, or CPU.") return requirements