Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenCL: work-around for double precision GPU on Apple-silicon #4198

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/silx/opencl/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
__contact__ = "[email protected]"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "22/03/2024"
__date__ = "20/11/2024"
__status__ = "stable"

import os
Expand Down Expand Up @@ -321,7 +321,11 @@ def _measure_workgroup_size(device_or_context, fast=False):
data = numpy.random.random(shape).astype(numpy.float32)
d_data = pyopencl.array.to_device(queue, data)
d_data_1 = pyopencl.array.empty_like(d_data)
d_data_1.fill(numpy.float32(1.0))
try:
d_data_1.fill(numpy.float32(1.0))
except Exception as err:
logger.error("Unable to execute any element-wise kernel! %s: %s", type(err), err)
return max_valid_wg

program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
if fast:
Expand Down
33 changes: 29 additions & 4 deletions src/silx/opencl/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
__contact__ = "[email protected]"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "09/11/2022"
__date__ = "20/11/2024"
__status__ = "stable"

import sys
Expand Down Expand Up @@ -153,6 +153,7 @@ def __init__(
"""
self.sem = threading.Semaphore()
self._X87_VOLATILE = None
self._APPLE_GPU = None
self.profile = None
self.events = [] # List with of EventDescription, kept for profiling
self.cl_mem = {} # dict with all buffer allocated
Expand Down Expand Up @@ -488,7 +489,9 @@ def transfer_to_texture(self, arr, tex_ref):

@property
def x87_volatile_option(self):
# this is running 32 bits OpenCL woth POCL
"""This enforces float32/64 operations to be performed actually with 32/64 bit precision
and not larger (80 bits) as the x87 unit is capable of. This unit is used only with POCL
driver when running on a 32 bits platform x86"""
if self._X87_VOLATILE is None:
if (
platform.machine() in ("i386", "i686", "x86_64", "AMD64")
Expand All @@ -500,15 +503,37 @@ def x87_volatile_option(self):
self._X87_VOLATILE = ""
return self._X87_VOLATILE

def get_compiler_options(self, x87_volatile=False):
@property
def apple_gpu_option(self):
"""This overwrites the preprocessor variable `cl_khr_fp64` with the proper value, 0 or 1.
On Apple GPU driver, this variable is wrongly set to `1` while the driver has no support
for double precision and crashes the compilation. The value obtained from the driver is
apparently correct"""
if self._APPLE_GPU is None:
if (platform.machine() == "arm64" and
platform.system() == 'Darwin' and
self.ctx.devices[0].type == pyopencl.device_type.GPU # check "gpu"
):
fp64_support = 1 if "cl_khr_fp64" in self.ctx.devices[0].extensions else 0
self._APPLE_GPU = f"-Dcl_khr_fp64={fp64_support}"
else:
self._APPLE_GPU = ""
return self._APPLE_GPU



def get_compiler_options(self, x87_volatile=False, apple_gpu=False):
"""Provide the default OpenCL compiler options

:param x87_volatile: needed for Kahan summation
:param x87_volatile: needed for Kahan summation
:param apple_gpu: work around for bug on AppleSilicon GPU compiler
:return: string with compiler option
"""
option_list = []
if x87_volatile:
option_list.append(self.x87_volatile_option)
if apple_gpu:
option_list.append(self.apple_gpu_option)
return " ".join(i for i in option_list if i)


Expand Down