diff --git a/.github/workflows/tests_preview.yml b/.github/workflows/tests_preview.yml index b8feed0b4e..94e5421aec 100644 --- a/.github/workflows/tests_preview.yml +++ b/.github/workflows/tests_preview.yml @@ -219,7 +219,9 @@ jobs: integration-tests-macos: name: Integration / macos-latest needs: unit-tests - runs-on: macos-latest + runs-on: macos-latest-xl + env: + HAYSTACK_MPS_ENABLED : false steps: - uses: actions/checkout@v4 diff --git a/e2e/modeling/test_dpr.py b/e2e/modeling/test_dpr.py index 57016f2fe7..281d33a197 100644 --- a/e2e/modeling/test_dpr.py +++ b/e2e/modeling/test_dpr.py @@ -1,5 +1,6 @@ from typing import Dict, Any from pathlib import Path +import os import numpy as np import pytest @@ -707,6 +708,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa if torch.cuda.is_available(): device = torch.device("cuda") + elif ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ): + device = torch.device("mps") else: device = torch.device("cpu") model = BiAdaptiveModel( @@ -753,6 +760,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa if torch.cuda.is_available(): device = torch.device("cuda") + elif ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ): + device = torch.device("mps") else: device = torch.device("cpu") loaded_model = BiAdaptiveModel( @@ -879,6 +892,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa if torch.cuda.is_available(): device = torch.device("cuda") + elif ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ): + device = torch.device("mps") else: device = torch.device("cpu") model = BiAdaptiveModel( diff --git a/haystack/environment.py b/haystack/environment.py index 20d63c31f6..60f4304a94 100644 --- a/haystack/environment.py +++ b/haystack/environment.py @@ -106,11 +106,16 @@ def collect_static_system_specs() -> Dict[str, Any]: try: torch_import.check() + has_mps = ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ) specs.update( { "libraries.torch": torch.__version__, "libraries.cuda": torch.version.cuda if torch.cuda.is_available() else False, - "hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 0, + "hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0, } ) except ImportError: diff --git a/haystack/modeling/utils.py b/haystack/modeling/utils.py index 8c849823d8..c2dff09cdb 100644 --- a/haystack/modeling/utils.py +++ b/haystack/modeling/utils.py @@ -112,6 +112,13 @@ def initialize_device_settings( else: devices_to_use = [torch.device("cuda:0")] n_gpu = 1 + elif ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ): + devices_to_use = [torch.device("mps")] + n_gpu = 1 else: devices_to_use = [torch.device("cpu")] n_gpu = 0 @@ -180,6 +187,7 @@ def all_gather_list(data, group=None, max_size=16384): data (Any): data from the local worker to be gathered on other workers group (optional): group of the collective """ + # pylint: disable=all SIZE_STORAGE_BYTES = 4 # int32 to encode the payload size enc = pickle.dumps(data) diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py index 53324e8407..8444bf8a3f 100644 --- a/haystack/preview/components/readers/extractive.py +++ b/haystack/preview/components/readers/extractive.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union import math import warnings +import os from haystack.preview import component, default_to_dict, ComponentError, Document, ExtractedAnswer from haystack.preview.lazy_imports import LazyImport @@ -111,6 +112,12 @@ def warm_up(self): if self.model is None: if torch.cuda.is_available(): self.device = self.device or "cuda:0" + elif ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ): + self.device = self.device or "mps:0" else: self.device = self.device or "cpu:0" self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name_or_path, token=self.token).to( diff --git a/haystack/utils/experiment_tracking.py b/haystack/utils/experiment_tracking.py index 5715139908..2a9f8d1ef4 100644 --- a/haystack/utils/experiment_tracking.py +++ b/haystack/utils/experiment_tracking.py @@ -17,7 +17,7 @@ import transformers with LazyImport("Run Run 'pip install farm-haystack[metrics]'") as mlflow_import: - import mlflow + import mlflow # pylint: disable=import-error logger = logging.getLogger(__name__) @@ -236,6 +236,11 @@ def get_or_create_env_meta_data() -> Dict[str, Any]: from haystack.telemetry import HAYSTACK_EXECUTION_CONTEXT global env_meta_data # pylint: disable=global-statement + has_mps = ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ) if not env_meta_data: env_meta_data = { "os_version": platform.release(), @@ -246,7 +251,7 @@ def get_or_create_env_meta_data() -> Dict[str, Any]: "transformers_version": transformers.__version__, "torch_version": torch.__version__, "torch_cuda_version": torch.version.cuda if torch.cuda.is_available() else 0, - "n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 0, + "n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0, "n_cpu": os.cpu_count(), "context": os.environ.get(HAYSTACK_EXECUTION_CONTEXT), "execution_env": _get_execution_environment(), diff --git a/haystack/utils/torch_utils.py b/haystack/utils/torch_utils.py index d1344b8fba..01aaba8023 100644 --- a/haystack/utils/torch_utils.py +++ b/haystack/utils/torch_utils.py @@ -1,4 +1,5 @@ from typing import Optional, List, Union +import os import torch from torch.utils.data import Dataset @@ -44,4 +45,10 @@ def get_devices(devices: Optional[List[Union[str, torch.device]]]) -> List[torch return [torch.device(device) for device in devices] elif torch.cuda.is_available(): return [torch.device(device) for device in range(torch.cuda.device_count())] + elif ( + hasattr(torch.backends, "mps") + and torch.backends.mps.is_available() + and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false" + ): + return [torch.device("mps")] return [torch.device("cpu")] diff --git a/releasenotes/notes/add-apple-silicon-gpu-acceleration-38bf69781a933b95.yaml b/releasenotes/notes/add-apple-silicon-gpu-acceleration-38bf69781a933b95.yaml new file mode 100644 index 0000000000..10df92afb2 --- /dev/null +++ b/releasenotes/notes/add-apple-silicon-gpu-acceleration-38bf69781a933b95.yaml @@ -0,0 +1,4 @@ +--- +enhancements: + - | + Added support for Apple Silicon GPU acceleration through "mps pytorch", enabling better performance on Apple M1 hardware.