Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add apple silicon GPU acceleration #6151

Merged
merged 24 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/tests_preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ jobs:
integration-tests-macos:
name: Integration / macos-latest
needs: unit-tests
runs-on: macos-latest
runs-on: macos-latest-xl
env:
HAYSTACK_MPS_ENABLED : false
steps:
- uses: actions/checkout@v4

Expand Down
19 changes: 19 additions & 0 deletions e2e/modeling/test_dpr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict, Any
from pathlib import Path
import os

import numpy as np
import pytest
Expand Down Expand Up @@ -707,6 +708,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa

if torch.cuda.is_available():
device = torch.device("cuda")
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
):
device = torch.device("mps")
else:
device = torch.device("cpu")
model = BiAdaptiveModel(
Expand Down Expand Up @@ -753,6 +760,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa

if torch.cuda.is_available():
device = torch.device("cuda")
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
):
device = torch.device("mps")
else:
device = torch.device("cpu")
loaded_model = BiAdaptiveModel(
Expand Down Expand Up @@ -879,6 +892,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa

if torch.cuda.is_available():
device = torch.device("cuda")
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
):
device = torch.device("mps")
else:
device = torch.device("cpu")
model = BiAdaptiveModel(
Expand Down
7 changes: 6 additions & 1 deletion haystack/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,16 @@ def collect_static_system_specs() -> Dict[str, Any]:

try:
torch_import.check()
has_mps = (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
)
specs.update(
{
"libraries.torch": torch.__version__,
"libraries.cuda": torch.version.cuda if torch.cuda.is_available() else False,
"hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 0,
"hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0,
}
)
except ImportError:
Expand Down
8 changes: 8 additions & 0 deletions haystack/modeling/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@ def initialize_device_settings(
else:
devices_to_use = [torch.device("cuda:0")]
n_gpu = 1
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
):
devices_to_use = [torch.device("mps")]
n_gpu = 1
else:
devices_to_use = [torch.device("cpu")]
n_gpu = 0
Expand Down Expand Up @@ -180,6 +187,7 @@ def all_gather_list(data, group=None, max_size=16384):
data (Any): data from the local worker to be gathered on other workers
group (optional): group of the collective
"""
# pylint: disable=all
SIZE_STORAGE_BYTES = 4 # int32 to encode the payload size

enc = pickle.dumps(data)
Expand Down
7 changes: 7 additions & 0 deletions haystack/preview/components/readers/extractive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Dict, List, Optional, Tuple, Union
import math
import warnings
import os

from haystack.preview import component, default_to_dict, ComponentError, Document, ExtractedAnswer
from haystack.preview.lazy_imports import LazyImport
Expand Down Expand Up @@ -111,6 +112,12 @@ def warm_up(self):
if self.model is None:
if torch.cuda.is_available():
self.device = self.device or "cuda:0"
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
):
self.device = self.device or "mps:0"
else:
self.device = self.device or "cpu:0"
self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name_or_path, token=self.token).to(
Expand Down
9 changes: 7 additions & 2 deletions haystack/utils/experiment_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import transformers

with LazyImport("Run Run 'pip install farm-haystack[metrics]'") as mlflow_import:
import mlflow
import mlflow # pylint: disable=import-error


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -236,6 +236,11 @@ def get_or_create_env_meta_data() -> Dict[str, Any]:
from haystack.telemetry import HAYSTACK_EXECUTION_CONTEXT

global env_meta_data # pylint: disable=global-statement
has_mps = (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
)
if not env_meta_data:
env_meta_data = {
"os_version": platform.release(),
Expand All @@ -246,7 +251,7 @@ def get_or_create_env_meta_data() -> Dict[str, Any]:
"transformers_version": transformers.__version__,
"torch_version": torch.__version__,
"torch_cuda_version": torch.version.cuda if torch.cuda.is_available() else 0,
"n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 0,
"n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0,
"n_cpu": os.cpu_count(),
"context": os.environ.get(HAYSTACK_EXECUTION_CONTEXT),
"execution_env": _get_execution_environment(),
Expand Down
7 changes: 7 additions & 0 deletions haystack/utils/torch_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Optional, List, Union
import os

import torch
from torch.utils.data import Dataset
Expand Down Expand Up @@ -44,4 +45,10 @@ def get_devices(devices: Optional[List[Union[str, torch.device]]]) -> List[torch
return [torch.device(device) for device in devices]
elif torch.cuda.is_available():
return [torch.device(device) for device in range(torch.cuda.device_count())]
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
):
return [torch.device("mps")]
return [torch.device("cpu")]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
enhancements:
- |
Added support for Apple Silicon GPU acceleration through "mps pytorch", enabling better performance on Apple M1 hardware.