diff --git a/.buildkite/run-xpu-test.sh b/.buildkite/run-xpu-test.sh index 22a7e76937a76..6ffa66d5ef3d6 100644 --- a/.buildkite/run-xpu-test.sh +++ b/.buildkite/run-xpu-test.sh @@ -11,4 +11,4 @@ trap remove_docker_container EXIT remove_docker_container # Run the image and launch offline inference -docker run --network host --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path xpu-test python3 examples/offline_inference.py +docker run --network host --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --entrypoint="" xpu-test python3 examples/offline_inference.py diff --git a/.dockerignore b/.dockerignore index 79fa088fa809c..17ed0d97c88b3 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,6 @@ -vllm/*.so +/.github/ /.venv /build dist +Dockerfile* +vllm/*.so diff --git a/Dockerfile.xpu b/Dockerfile.xpu index 8471edd16e4bb..83db341556eaf 100644 --- a/Dockerfile.xpu +++ b/Dockerfile.xpu @@ -1,4 +1,4 @@ -FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 +FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 AS vllm-base RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \ echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \ @@ -7,20 +7,49 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc" | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \ chmod 644 /usr/share/keyrings/intel-graphics.gpg -RUN apt-get update -y && \ - apt-get install -y curl libicu70 lsb-release git wget vim numactl python3 python3-pip ffmpeg libsm6 libxext6 libgl1 - -COPY ./ /workspace/vllm +RUN apt-get update -y && \ + apt-get install -y --no-install-recommends --fix-missing \ + curl \ + ffmpeg \ + git \ + libsndfile1 \ + libsm6 \ + libxext6 \ + libgl1 \ + lsb-release \ + numactl \ + python3 \ + python3-dev \ + python3-pip \ + # vim \ + wget WORKDIR /workspace/vllm +COPY requirements-xpu.txt /workspace/vllm/requirements-xpu.txt +COPY requirements-common.txt /workspace/vllm/requirements-common.txt RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -v --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \ - cmake>=3.26 ninja packaging setuptools-scm>=8 wheel jinja2 \ - -r requirements-xpu.txt + pip install --no-cache-dir \ + --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \ + -r requirements-xpu.txt + +COPY ./ /workspace/vllm + +ENV VLLM_TARGET_DEVICE=xpu RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=.git,target=.git \ - VLLM_TARGET_DEVICE=xpu python3 setup.py install + python3 setup.py install CMD ["/bin/bash"] + +FROM vllm-base AS vllm-openai + +# install additional dependencies for openai api server +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install accelerate hf_transfer 'modelscope!=1.15.0' + +ENV VLLM_USAGE_SOURCE production-docker-image \ + TRITON_XPU_PROFILE 1 + +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/requirements-common.txt b/requirements-common.txt index a9596878a0f89..855169aae5fdf 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -1,7 +1,7 @@ psutil sentencepiece # Required for LLaMA tokenizer. numpy < 2.0.0 -requests +requests >= 2.26.0 tqdm py-cpuinfo transformers >= 4.45.0 # Required for Llama 3.2. diff --git a/requirements-xpu.txt b/requirements-xpu.txt index 9b21845e084d8..ce83a178c618f 100644 --- a/requirements-xpu.txt +++ b/requirements-xpu.txt @@ -1,9 +1,13 @@ # Common dependencies -r requirements-common.txt -setuptools < 70.0.0 # IPEX's torch have some dependency. to be removed. - ray >= 2.9 +cmake>=3.26 +ninja +packaging +setuptools-scm>=8 +wheel +jinja2 # Following pkgs retrieved from https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ torch == 2.3.1+cxx11.abi intel-extension-for-pytorch == 2.3.110+xpu diff --git a/setup.py b/setup.py index 8ef759f5245fc..26ed33f897455 100644 --- a/setup.py +++ b/setup.py @@ -415,6 +415,8 @@ def _read_requirements(filename: str) -> List[str]: for line in requirements: if line.startswith("-r "): resolved_requirements += _read_requirements(line.split()[1]) + elif line.startswith("--"): + continue else: resolved_requirements.append(line) return resolved_requirements diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index a483614d067e9..c648862b2d757 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -42,6 +42,15 @@ except Exception: pass +is_xpu = False + +try: + import torch + if hasattr(torch, 'xpu') and torch.xpu.is_available(): + is_xpu = True +except Exception: + pass + is_cpu = False try: from importlib.metadata import version @@ -60,6 +69,9 @@ elif is_rocm: from .rocm import RocmPlatform current_platform = RocmPlatform() +elif is_xpu: + from .xpu import XPUPlatform + current_platform = XPUPlatform() elif is_cpu: from .cpu import CpuPlatform current_platform = CpuPlatform() diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 360590d7d5eb6..7d3de706d14fe 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -8,6 +8,7 @@ class PlatformEnum(enum.Enum): CUDA = enum.auto() ROCM = enum.auto() TPU = enum.auto() + XPU = enum.auto() CPU = enum.auto() UNSPECIFIED = enum.auto() @@ -41,6 +42,9 @@ def is_rocm(self) -> bool: def is_tpu(self) -> bool: return self._enum == PlatformEnum.TPU + def is_xpu(self) -> bool: + return self._enum == PlatformEnum.XPU + def is_cpu(self) -> bool: return self._enum == PlatformEnum.CPU diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py new file mode 100644 index 0000000000000..e0f98d745b5e5 --- /dev/null +++ b/vllm/platforms/xpu.py @@ -0,0 +1,20 @@ +import torch + +from .interface import DeviceCapability, Platform, PlatformEnum + + +class XPUPlatform(Platform): + _enum = PlatformEnum.XPU + + @staticmethod + def get_device_capability(device_id: int = 0) -> DeviceCapability: + return DeviceCapability(major=int( + torch.xpu.get_device_capability(device_id)['version'].split('.') + [0]), + minor=int( + torch.xpu.get_device_capability(device_id) + ['version'].split('.')[1])) + + @staticmethod + def get_device_name(device_id: int = 0) -> str: + return torch.xpu.get_device_name(device_id)