From 6044e4e26f286b6ff2ce9d55c43403459d7726f0 Mon Sep 17 00:00:00 2001 From: Vladislav Zhurba Date: Tue, 21 May 2024 09:01:08 -0700 Subject: [PATCH] Rebase to CTK 12.5 --- README.md | 2 +- cuda/_cuda/ccuda.pxd.in | 30 + cuda/_cuda/ccuda.pyx.in | 183 +++ cuda/_lib/ccudart/ccudart.pxd.in | 1 + cuda/_lib/ccudart/ccudart.pyx.in | 11 + cuda/_lib/utils.pyx.in | 2 + cuda/ccuda.pxd.in | 100 +- cuda/ccuda.pyx.in | 36 + cuda/ccudart.pxd.in | 17 +- cuda/ccudart.pyx.in | 7 + cuda/cuda.pxd.in | 140 +- cuda/cuda.pyx.in | 1406 +++++++++++++++++++-- cuda/cudart.pxd.in | 15 +- cuda/cudart.pyx.in | 283 ++++- cuda/tests/test_interoperability.py | 40 + docs/_sources/install.md.txt | 2 +- docs/_sources/module/cuda.rst.txt | 170 ++- docs/_sources/module/cudart.rst.txt | 22 +- docs/_sources/release.md.txt | 1 + docs/_sources/release/12.2.1-notes.md.txt | 2 +- docs/_sources/release/12.4.0-notes.md.txt | 2 +- docs/_sources/release/12.5.0-notes.md.txt | 34 + docs/_static/documentation_options.js | 2 +- docs/api.html | 7 +- docs/conduct.html | 7 +- docs/contribute.html | 7 +- docs/genindex.html | 179 ++- docs/index.html | 8 +- docs/install.html | 9 +- docs/module/cuda.html | 1362 +++++++++++++++++--- docs/module/cudart.html | 257 +++- docs/module/nvrtc.html | 7 +- docs/motivation.html | 7 +- docs/objects.inv | Bin 30173 -> 30731 bytes docs/overview.html | 9 +- docs/release.html | 21 +- docs/release/11.4.0-notes.html | 7 +- docs/release/11.5.0-notes.html | 7 +- docs/release/11.6.0-notes.html | 7 +- docs/release/11.6.1-notes.html | 7 +- docs/release/11.7.0-notes.html | 7 +- docs/release/11.7.1-notes.html | 7 +- docs/release/11.8.0-notes.html | 7 +- docs/release/11.8.1-notes.html | 7 +- docs/release/11.8.2-notes.html | 7 +- docs/release/11.8.3-notes.html | 7 +- docs/release/12.0.0-notes.html | 7 +- docs/release/12.1.0-notes.html | 7 +- docs/release/12.2.0-notes.html | 7 +- docs/release/12.2.1-notes.html | 9 +- docs/release/12.3.0-notes.html | 7 +- docs/release/12.4.0-notes.html | 13 +- docs/release/12.5.0-notes.html | 357 ++++++ docs/search.html | 7 +- docs/searchindex.js | 2 +- docs_src/source/conf.py | 2 +- docs_src/source/install.md | 2 +- docs_src/source/module/cuda.rst | 170 ++- docs_src/source/module/cudart.rst | 22 +- docs_src/source/release.md | 1 + docs_src/source/release/12.5.0-notes.md | 34 + pyproject.toml | 1 + 62 files changed, 4624 insertions(+), 482 deletions(-) create mode 100644 docs/_sources/release/12.5.0-notes.md.txt create mode 100644 docs/release/12.5.0-notes.html create mode 100644 docs_src/source/release/12.5.0-notes.md diff --git a/README.md b/README.md index c6ae3ce5..7a8043a0 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ There're differences in each of these options that are described further in [Ins CUDA Python is supported on all platforms that CUDA is supported. Specific dependencies are as follows: * Driver: Linux (450.80.02 or later) Windows (456.38 or later) -* CUDA Toolkit 12.0 to 12.4 +* CUDA Toolkit 12.0 to 12.5 Only the NVRTC redistributable component is required from the CUDA Toolkit. [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/index.html) Installation Guides can be used for guidance. Note that the NVRTC component in the Toolkit can be obtained via PYPI, Conda or Local Installer. diff --git a/cuda/_cuda/ccuda.pxd.in b/cuda/_cuda/ccuda.pxd.in index 2ef88939..82eba6e2 100644 --- a/cuda/_cuda/ccuda.pxd.in +++ b/cuda/_cuda/ccuda.pxd.in @@ -147,6 +147,11 @@ cdef CUresult _cuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev) cdef CUresult _cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuCtxCreate_v4' in found_functions}} + +cdef CUresult _cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuCtxDestroy_v2' in found_functions}} cdef CUresult _cuCtxDestroy_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil @@ -237,6 +242,16 @@ cdef CUresult _cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil cdef CUresult _cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuCtxRecordEvent' in found_functions}} + +cdef CUresult _cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + +{{if 'cuCtxWaitEvent' in found_functions}} + +cdef CUresult _cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuCtxAttach' in found_functions}} cdef CUresult _cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil @@ -382,6 +397,11 @@ cdef CUresult _cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUD cdef CUresult _cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuKernelGetLibrary' in found_functions}} + +cdef CUresult _cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuLibraryGetGlobal' in found_functions}} cdef CUresult _cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil @@ -1027,6 +1047,11 @@ cdef CUresult _cuStreamGetId(CUstream hStream, unsigned long long* streamId) exc cdef CUresult _cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuStreamGetCtx_v2' in found_functions}} + +cdef CUresult _cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuStreamWaitEvent' in found_functions}} cdef CUresult _cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil @@ -2152,6 +2177,11 @@ cdef CUresult _cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA cdef CUresult _cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuGreenCtxStreamCreate' in found_functions}} + +cdef CUresult _cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuProfilerStart' in found_functions}} cdef CUresult _cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil diff --git a/cuda/_cuda/ccuda.pyx.in b/cuda/_cuda/ccuda.pyx.in index ed0975c9..64a296f1 100644 --- a/cuda/_cuda/ccuda.pyx.in +++ b/cuda/_cuda/ccuda.pyx.in @@ -44,6 +44,7 @@ cdef bint __cuPythonInit = False {{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}cdef void *__cuDevicePrimaryCtxReset_v2 = NULL{{endif}} {{if 'cuCtxCreate_v2' in found_functions}}cdef void *__cuCtxCreate_v2 = NULL{{endif}} {{if 'cuCtxCreate_v3' in found_functions}}cdef void *__cuCtxCreate_v3 = NULL{{endif}} +{{if 'cuCtxCreate_v4' in found_functions}}cdef void *__cuCtxCreate_v4 = NULL{{endif}} {{if 'cuCtxDestroy_v2' in found_functions}}cdef void *__cuCtxDestroy_v2 = NULL{{endif}} {{if 'cuCtxPushCurrent_v2' in found_functions}}cdef void *__cuCtxPushCurrent_v2 = NULL{{endif}} {{if 'cuCtxPopCurrent_v2' in found_functions}}cdef void *__cuCtxPopCurrent_v2 = NULL{{endif}} @@ -62,6 +63,8 @@ cdef bint __cuPythonInit = False {{if 'cuCtxGetStreamPriorityRange' in found_functions}}cdef void *__cuCtxGetStreamPriorityRange = NULL{{endif}} {{if 'cuCtxResetPersistingL2Cache' in found_functions}}cdef void *__cuCtxResetPersistingL2Cache = NULL{{endif}} {{if 'cuCtxGetExecAffinity' in found_functions}}cdef void *__cuCtxGetExecAffinity = NULL{{endif}} +{{if 'cuCtxRecordEvent' in found_functions}}cdef void *__cuCtxRecordEvent = NULL{{endif}} +{{if 'cuCtxWaitEvent' in found_functions}}cdef void *__cuCtxWaitEvent = NULL{{endif}} {{if 'cuCtxAttach' in found_functions}}cdef void *__cuCtxAttach = NULL{{endif}} {{if 'cuCtxDetach' in found_functions}}cdef void *__cuCtxDetach = NULL{{endif}} {{if 'cuCtxGetSharedMemConfig' in found_functions}}cdef void *__cuCtxGetSharedMemConfig = NULL{{endif}} @@ -91,6 +94,7 @@ cdef bint __cuPythonInit = False {{if 'cuLibraryEnumerateKernels' in found_functions}}cdef void *__cuLibraryEnumerateKernels = NULL{{endif}} {{if 'cuLibraryGetModule' in found_functions}}cdef void *__cuLibraryGetModule = NULL{{endif}} {{if 'cuKernelGetFunction' in found_functions}}cdef void *__cuKernelGetFunction = NULL{{endif}} +{{if 'cuKernelGetLibrary' in found_functions}}cdef void *__cuKernelGetLibrary = NULL{{endif}} {{if 'cuLibraryGetGlobal' in found_functions}}cdef void *__cuLibraryGetGlobal = NULL{{endif}} {{if 'cuLibraryGetManaged' in found_functions}}cdef void *__cuLibraryGetManaged = NULL{{endif}} {{if 'cuLibraryGetUnifiedFunction' in found_functions}}cdef void *__cuLibraryGetUnifiedFunction = NULL{{endif}} @@ -220,6 +224,7 @@ cdef bint __cuPythonInit = False {{if 'cuStreamGetFlags' in found_functions}}cdef void *__cuStreamGetFlags = NULL{{endif}} {{if 'cuStreamGetId' in found_functions}}cdef void *__cuStreamGetId = NULL{{endif}} {{if 'cuStreamGetCtx' in found_functions}}cdef void *__cuStreamGetCtx = NULL{{endif}} +{{if 'cuStreamGetCtx_v2' in found_functions}}cdef void *__cuStreamGetCtx_v2 = NULL{{endif}} {{if 'cuStreamWaitEvent' in found_functions}}cdef void *__cuStreamWaitEvent = NULL{{endif}} {{if 'cuStreamAddCallback' in found_functions}}cdef void *__cuStreamAddCallback = NULL{{endif}} {{if 'cuStreamBeginCapture_v2' in found_functions}}cdef void *__cuStreamBeginCapture_v2 = NULL{{endif}} @@ -445,6 +450,7 @@ cdef bint __cuPythonInit = False {{if 'cuGreenCtxRecordEvent' in found_functions}}cdef void *__cuGreenCtxRecordEvent = NULL{{endif}} {{if 'cuGreenCtxWaitEvent' in found_functions}}cdef void *__cuGreenCtxWaitEvent = NULL{{endif}} {{if 'cuStreamGetGreenCtx' in found_functions}}cdef void *__cuStreamGetGreenCtx = NULL{{endif}} +{{if 'cuGreenCtxStreamCreate' in found_functions}}cdef void *__cuGreenCtxStreamCreate = NULL{{endif}} {{if 'cuProfilerStart' in found_functions}}cdef void *__cuProfilerStart = NULL{{endif}} {{if 'cuProfilerStop' in found_functions}}cdef void *__cuProfilerStop = NULL{{endif}} {{if True}}cdef void *__cuGraphicsEGLRegisterImage = NULL{{endif}} @@ -704,6 +710,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuStreamGetCtx cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx, 9020, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL) {{endif}} + {{if 'cuStreamGetCtx_v2' in found_functions}} + global __cuStreamGetCtx_v2 + cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx_v2, 12050, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL) + {{endif}} {{if 'cuStreamWaitEvent' in found_functions}} global __cuStreamWaitEvent cuGetProcAddress('cuStreamWaitEvent', &__cuStreamWaitEvent, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL) @@ -1027,6 +1037,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuStreamGetCtx cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx, 9020, CU_GET_PROC_ADDRESS_DEFAULT, NULL) {{endif}} + {{if 'cuStreamGetCtx_v2' in found_functions}} + global __cuStreamGetCtx_v2 + cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx_v2, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL) + {{endif}} {{if 'cuStreamWaitEvent' in found_functions}} global __cuStreamWaitEvent cuGetProcAddress('cuStreamWaitEvent', &__cuStreamWaitEvent, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL) @@ -1276,6 +1290,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuCtxCreate_v3 cuGetProcAddress('cuCtxCreate', &__cuCtxCreate_v3, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL) {{endif}} + {{if 'cuCtxCreate_v4' in found_functions}} + global __cuCtxCreate_v4 + cuGetProcAddress('cuCtxCreate', &__cuCtxCreate_v4, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL) + {{endif}} {{if 'cuCtxDestroy_v2' in found_functions}} global __cuCtxDestroy_v2 cuGetProcAddress('cuCtxDestroy', &__cuCtxDestroy_v2, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL) @@ -1348,6 +1366,14 @@ cdef int cuPythonInit() except -1 nogil: global __cuCtxGetExecAffinity cuGetProcAddress('cuCtxGetExecAffinity', &__cuCtxGetExecAffinity, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL) {{endif}} + {{if 'cuCtxRecordEvent' in found_functions}} + global __cuCtxRecordEvent + cuGetProcAddress('cuCtxRecordEvent', &__cuCtxRecordEvent, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL) + {{endif}} + {{if 'cuCtxWaitEvent' in found_functions}} + global __cuCtxWaitEvent + cuGetProcAddress('cuCtxWaitEvent', &__cuCtxWaitEvent, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL) + {{endif}} {{if 'cuCtxAttach' in found_functions}} global __cuCtxAttach cuGetProcAddress('cuCtxAttach', &__cuCtxAttach, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL) @@ -1464,6 +1490,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuKernelGetFunction cuGetProcAddress('cuKernelGetFunction', &__cuKernelGetFunction, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL) {{endif}} + {{if 'cuKernelGetLibrary' in found_functions}} + global __cuKernelGetLibrary + cuGetProcAddress('cuKernelGetLibrary', &__cuKernelGetLibrary, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL) + {{endif}} {{if 'cuLibraryGetGlobal' in found_functions}} global __cuLibraryGetGlobal cuGetProcAddress('cuLibraryGetGlobal', &__cuLibraryGetGlobal, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL) @@ -2560,6 +2590,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuStreamGetGreenCtx cuGetProcAddress('cuStreamGetGreenCtx', &__cuStreamGetGreenCtx, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL) {{endif}} + {{if 'cuGreenCtxStreamCreate' in found_functions}} + global __cuGreenCtxStreamCreate + cuGetProcAddress('cuGreenCtxStreamCreate', &__cuGreenCtxStreamCreate, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL) + {{endif}} {{if 'cuProfilerStart' in found_functions}} global __cuProfilerStart cuGetProcAddress('cuProfilerStart', &__cuProfilerStart, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL) @@ -2974,6 +3008,13 @@ cdef int cuPythonInit() except -1 nogil: except: pass {{endif}} + {{if 'cuStreamGetCtx_v2' in found_functions}} + try: + global __cuStreamGetCtx_v2 + __cuStreamGetCtx_v2 = win32api.GetProcAddress(handle, 'cuStreamGetCtx_v2_ptsz') + except: + pass + {{endif}} {{if 'cuStreamWaitEvent' in found_functions}} try: global __cuStreamWaitEvent @@ -3537,6 +3578,13 @@ cdef int cuPythonInit() except -1 nogil: except: pass {{endif}} + {{if 'cuStreamGetCtx_v2' in found_functions}} + try: + global __cuStreamGetCtx_v2 + __cuStreamGetCtx_v2 = win32api.GetProcAddress(handle, 'cuStreamGetCtx_v2') + except: + pass + {{endif}} {{if 'cuStreamWaitEvent' in found_functions}} try: global __cuStreamWaitEvent @@ -3972,6 +4020,13 @@ cdef int cuPythonInit() except -1 nogil: except: pass {{endif}} + {{if 'cuCtxCreate_v4' in found_functions}} + try: + global __cuCtxCreate_v4 + __cuCtxCreate_v4 = win32api.GetProcAddress(handle, 'cuCtxCreate_v4') + except: + pass + {{endif}} {{if 'cuCtxDestroy_v2' in found_functions}} try: global __cuCtxDestroy_v2 @@ -4098,6 +4153,20 @@ cdef int cuPythonInit() except -1 nogil: except: pass {{endif}} + {{if 'cuCtxRecordEvent' in found_functions}} + try: + global __cuCtxRecordEvent + __cuCtxRecordEvent = win32api.GetProcAddress(handle, 'cuCtxRecordEvent') + except: + pass + {{endif}} + {{if 'cuCtxWaitEvent' in found_functions}} + try: + global __cuCtxWaitEvent + __cuCtxWaitEvent = win32api.GetProcAddress(handle, 'cuCtxWaitEvent') + except: + pass + {{endif}} {{if 'cuCtxAttach' in found_functions}} try: global __cuCtxAttach @@ -4301,6 +4370,13 @@ cdef int cuPythonInit() except -1 nogil: except: pass {{endif}} + {{if 'cuKernelGetLibrary' in found_functions}} + try: + global __cuKernelGetLibrary + __cuKernelGetLibrary = win32api.GetProcAddress(handle, 'cuKernelGetLibrary') + except: + pass + {{endif}} {{if 'cuLibraryGetGlobal' in found_functions}} try: global __cuLibraryGetGlobal @@ -6219,6 +6295,13 @@ cdef int cuPythonInit() except -1 nogil: except: pass {{endif}} + {{if 'cuGreenCtxStreamCreate' in found_functions}} + try: + global __cuGreenCtxStreamCreate + __cuGreenCtxStreamCreate = win32api.GetProcAddress(handle, 'cuGreenCtxStreamCreate') + except: + pass + {{endif}} {{if 'cuProfilerStart' in found_functions}} try: global __cuProfilerStart @@ -6555,6 +6638,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuStreamGetCtx __cuStreamGetCtx = dlfcn.dlsym(handle, 'cuStreamGetCtx_ptsz') {{endif}} + {{if 'cuStreamGetCtx_v2' in found_functions}} + global __cuStreamGetCtx_v2 + __cuStreamGetCtx_v2 = dlfcn.dlsym(handle, 'cuStreamGetCtx_v2_ptsz') + {{endif}} {{if 'cuStreamWaitEvent' in found_functions}} global __cuStreamWaitEvent __cuStreamWaitEvent = dlfcn.dlsym(handle, 'cuStreamWaitEvent_ptsz') @@ -6878,6 +6965,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuStreamGetCtx __cuStreamGetCtx = dlfcn.dlsym(handle, 'cuStreamGetCtx') {{endif}} + {{if 'cuStreamGetCtx_v2' in found_functions}} + global __cuStreamGetCtx_v2 + __cuStreamGetCtx_v2 = dlfcn.dlsym(handle, 'cuStreamGetCtx_v2') + {{endif}} {{if 'cuStreamWaitEvent' in found_functions}} global __cuStreamWaitEvent __cuStreamWaitEvent = dlfcn.dlsym(handle, 'cuStreamWaitEvent') @@ -7127,6 +7218,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuCtxCreate_v3 __cuCtxCreate_v3 = dlfcn.dlsym(handle, 'cuCtxCreate_v3') {{endif}} + {{if 'cuCtxCreate_v4' in found_functions}} + global __cuCtxCreate_v4 + __cuCtxCreate_v4 = dlfcn.dlsym(handle, 'cuCtxCreate_v4') + {{endif}} {{if 'cuCtxDestroy_v2' in found_functions}} global __cuCtxDestroy_v2 __cuCtxDestroy_v2 = dlfcn.dlsym(handle, 'cuCtxDestroy_v2') @@ -7199,6 +7294,14 @@ cdef int cuPythonInit() except -1 nogil: global __cuCtxGetExecAffinity __cuCtxGetExecAffinity = dlfcn.dlsym(handle, 'cuCtxGetExecAffinity') {{endif}} + {{if 'cuCtxRecordEvent' in found_functions}} + global __cuCtxRecordEvent + __cuCtxRecordEvent = dlfcn.dlsym(handle, 'cuCtxRecordEvent') + {{endif}} + {{if 'cuCtxWaitEvent' in found_functions}} + global __cuCtxWaitEvent + __cuCtxWaitEvent = dlfcn.dlsym(handle, 'cuCtxWaitEvent') + {{endif}} {{if 'cuCtxAttach' in found_functions}} global __cuCtxAttach __cuCtxAttach = dlfcn.dlsym(handle, 'cuCtxAttach') @@ -7315,6 +7418,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuKernelGetFunction __cuKernelGetFunction = dlfcn.dlsym(handle, 'cuKernelGetFunction') {{endif}} + {{if 'cuKernelGetLibrary' in found_functions}} + global __cuKernelGetLibrary + __cuKernelGetLibrary = dlfcn.dlsym(handle, 'cuKernelGetLibrary') + {{endif}} {{if 'cuLibraryGetGlobal' in found_functions}} global __cuLibraryGetGlobal __cuLibraryGetGlobal = dlfcn.dlsym(handle, 'cuLibraryGetGlobal') @@ -8411,6 +8518,10 @@ cdef int cuPythonInit() except -1 nogil: global __cuStreamGetGreenCtx __cuStreamGetGreenCtx = dlfcn.dlsym(handle, 'cuStreamGetGreenCtx') {{endif}} + {{if 'cuGreenCtxStreamCreate' in found_functions}} + global __cuGreenCtxStreamCreate + __cuGreenCtxStreamCreate = dlfcn.dlsym(handle, 'cuGreenCtxStreamCreate') + {{endif}} {{if 'cuProfilerStart' in found_functions}} global __cuProfilerStart __cuProfilerStart = dlfcn.dlsym(handle, 'cuProfilerStart') @@ -8833,6 +8944,18 @@ cdef CUresult _cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, return err {{endif}} +{{if 'cuCtxCreate_v4' in found_functions}} + +cdef CUresult _cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil: + global __cuCtxCreate_v4 + cuPythonInit() + if __cuCtxCreate_v4 == NULL: + with gil: + raise RuntimeError('Function "cuCtxCreate_v4" not found') + err = ( __cuCtxCreate_v4)(pctx, ctxCreateParams, flags, dev) + return err +{{endif}} + {{if 'cuCtxDestroy_v2' in found_functions}} cdef CUresult _cuCtxDestroy_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -9049,6 +9172,30 @@ cdef CUresult _cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAf return err {{endif}} +{{if 'cuCtxRecordEvent' in found_functions}} + +cdef CUresult _cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil: + global __cuCtxRecordEvent + cuPythonInit() + if __cuCtxRecordEvent == NULL: + with gil: + raise RuntimeError('Function "cuCtxRecordEvent" not found') + err = ( __cuCtxRecordEvent)(hCtx, hEvent) + return err +{{endif}} + +{{if 'cuCtxWaitEvent' in found_functions}} + +cdef CUresult _cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil: + global __cuCtxWaitEvent + cuPythonInit() + if __cuCtxWaitEvent == NULL: + with gil: + raise RuntimeError('Function "cuCtxWaitEvent" not found') + err = ( __cuCtxWaitEvent)(hCtx, hEvent) + return err +{{endif}} + {{if 'cuCtxAttach' in found_functions}} cdef CUresult _cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -9397,6 +9544,18 @@ cdef CUresult _cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?C return err {{endif}} +{{if 'cuKernelGetLibrary' in found_functions}} + +cdef CUresult _cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil: + global __cuKernelGetLibrary + cuPythonInit() + if __cuKernelGetLibrary == NULL: + with gil: + raise RuntimeError('Function "cuKernelGetLibrary" not found') + err = ( __cuKernelGetLibrary)(pLib, kernel) + return err +{{endif}} + {{if 'cuLibraryGetGlobal' in found_functions}} cdef CUresult _cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -10945,6 +11104,18 @@ cdef CUresult _cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ER return err {{endif}} +{{if 'cuStreamGetCtx_v2' in found_functions}} + +cdef CUresult _cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil: + global __cuStreamGetCtx_v2 + cuPythonInit() + if __cuStreamGetCtx_v2 == NULL: + with gil: + raise RuntimeError('Function "cuStreamGetCtx_v2" not found') + err = ( __cuStreamGetCtx_v2)(hStream, pCtx, pGreenCtx) + return err +{{endif}} + {{if 'cuStreamWaitEvent' in found_functions}} cdef CUresult _cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -13645,6 +13816,18 @@ cdef CUresult _cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ? return err {{endif}} +{{if 'cuGreenCtxStreamCreate' in found_functions}} + +cdef CUresult _cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil: + global __cuGreenCtxStreamCreate + cuPythonInit() + if __cuGreenCtxStreamCreate == NULL: + with gil: + raise RuntimeError('Function "cuGreenCtxStreamCreate" not found') + err = ( __cuGreenCtxStreamCreate)(phStream, greenCtx, flags, priority) + return err +{{endif}} + {{if 'cuProfilerStart' in found_functions}} cdef CUresult _cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil: diff --git a/cuda/_lib/ccudart/ccudart.pxd.in b/cuda/_lib/ccudart/ccudart.pxd.in index 6d5221a6..072fa91a 100644 --- a/cuda/_lib/ccudart/ccudart.pxd.in +++ b/cuda/_lib/ccudart/ccudart.pxd.in @@ -308,3 +308,4 @@ from libcpp cimport bool {{if True}}cdef cudaError_t _getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} {{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} {{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} +{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}} diff --git a/cuda/_lib/ccudart/ccudart.pyx.in b/cuda/_lib/ccudart/ccudart.pyx.in index 4b4352b1..25cb36f2 100644 --- a/cuda/_lib/ccudart/ccudart.pyx.in +++ b/cuda/_lib/ccudart/ccudart.pyx.in @@ -4916,4 +4916,15 @@ cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCal if err != cudaSuccess: _setLastError(err) return err + +{{endif}} +{{if 'cudaGetDriverEntryPointByVersion' in found_functions}} + +cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil: + cdef cudaError_t err = cudaSuccess + err = ccuda._cuGetProcAddress_v2(symbol, funcPtr, cudaVersion, flags, driverStatus) + if err != cudaSuccess: + _setLastError(err) + return err + {{endif}} diff --git a/cuda/_lib/utils.pyx.in b/cuda/_lib/utils.pyx.in index 271edfaa..6f61f90b 100644 --- a/cuda/_lib/utils.pyx.in +++ b/cuda/_lib/utils.pyx.in @@ -142,6 +142,8 @@ cdef class HelperInputVoidPtr: elif isinstance(ptr, (int)): # Easy run, user gave us an already configured void** address self._cptr = ptr + elif isinstance(ptr, (cuda.CUdeviceptr)): + self._cptr = int(ptr) elif PyObject_CheckBuffer(ptr): # Easy run, get address from Python Buffer Protocol err_buffer = PyObject_GetBuffer(ptr, &self._pybuffer, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS) diff --git a/cuda/ccuda.pxd.in b/cuda/ccuda.pxd.in index 3be050c7..91e051d4 100644 --- a/cuda/ccuda.pxd.in +++ b/cuda/ccuda.pxd.in @@ -116,6 +116,10 @@ cdef extern from "cuda.h": pass ctypedef CUasyncCallbackEntry_st* CUasyncCallbackHandle + cdef struct CUgreenCtx_st: + pass + ctypedef CUgreenCtx_st* CUgreenCtx + cdef struct CUuuid_st: char bytes[16] @@ -359,7 +363,20 @@ cdef extern from "cuda.h": CU_AD_FORMAT_BC6H_SF16 = 156 CU_AD_FORMAT_BC7_UNORM = 157 CU_AD_FORMAT_BC7_UNORM_SRGB = 158 + CU_AD_FORMAT_P010 = 159 + CU_AD_FORMAT_P016 = 161 + CU_AD_FORMAT_NV16 = 162 + CU_AD_FORMAT_P210 = 163 + CU_AD_FORMAT_P216 = 164 + CU_AD_FORMAT_YUY2 = 165 + CU_AD_FORMAT_Y210 = 166 + CU_AD_FORMAT_Y216 = 167 + CU_AD_FORMAT_AYUV = 168 + CU_AD_FORMAT_Y410 = 169 CU_AD_FORMAT_NV12 = 176 + CU_AD_FORMAT_Y416 = 177 + CU_AD_FORMAT_Y444_PLANAR8 = 178 + CU_AD_FORMAT_Y444_PLANAR10 = 179 CU_AD_FORMAT_UNORM_INT8X1 = 192 CU_AD_FORMAT_UNORM_INT8X2 = 193 CU_AD_FORMAT_UNORM_INT8X4 = 194 @@ -372,6 +389,7 @@ cdef extern from "cuda.h": CU_AD_FORMAT_SNORM_INT16X1 = 201 CU_AD_FORMAT_SNORM_INT16X2 = 202 CU_AD_FORMAT_SNORM_INT16X4 = 203 + CU_AD_FORMAT_MAX = 2147483647 ctypedef CUarray_format_enum CUarray_format @@ -530,7 +548,8 @@ cdef extern from "cuda.h": CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132 CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133 CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134 - CU_DEVICE_ATTRIBUTE_MAX = 135 + CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135 + CU_DEVICE_ATTRIBUTE_MAX = 136 ctypedef CUdevice_attribute_enum CUdevice_attribute @@ -778,7 +797,10 @@ cdef extern from "cuda.h": CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 4 CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 5 CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 6 - CU_LIMIT_MAX = 7 + CU_LIMIT_SHMEM_SIZE = 7 + CU_LIMIT_CIG_ENABLED = 8 + CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = 9 + CU_LIMIT_MAX = 10 ctypedef CUlimit_enum CUlimit @@ -998,6 +1020,7 @@ cdef extern from "cuda.h": CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10 CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12 CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13 + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14 ctypedef CUlaunchAttributeID_enum CUlaunchAttributeID @@ -1033,6 +1056,7 @@ cdef extern from "cuda.h": CUlaunchMemSyncDomainMap memSyncDomainMap CUlaunchMemSyncDomain memSyncDomain anon_struct4 deviceUpdatableKernelNode + unsigned int sharedMemCarveout ctypedef CUlaunchAttributeValue_union CUlaunchAttributeValue @@ -1120,6 +1144,24 @@ cdef extern from "cuda.h": ctypedef CUexecAffinityParam_v1 CUexecAffinityParam + cdef enum CUcigDataType_enum: + CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = 1 + + ctypedef CUcigDataType_enum CUcigDataType + + cdef struct CUctxCigParam_st: + CUcigDataType sharedDataType + void* sharedData + + ctypedef CUctxCigParam_st CUctxCigParam + + cdef struct CUctxCreateParams_st: + CUexecAffinityParam* execAffinityParams + int numExecAffinityParams + CUctxCigParam* cigParams + + ctypedef CUctxCreateParams_st CUctxCreateParams + cdef enum CUlibraryOption_enum: CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE = 0 CU_LIBRARY_BINARY_IS_PRESERVED = 1 @@ -2123,13 +2165,19 @@ cdef extern from "cuda.h": CU_COREDUMP_ENABLE_USER_TRIGGER = 4 CU_COREDUMP_FILE = 5 CU_COREDUMP_PIPE = 6 - CU_COREDUMP_MAX = 7 + CU_COREDUMP_GENERATION_FLAGS = 7 + CU_COREDUMP_MAX = 8 ctypedef CUcoredumpSettings_enum CUcoredumpSettings - cdef struct CUgreenCtx_st: - pass - ctypedef CUgreenCtx_st* CUgreenCtx + cdef enum CUCoredumpGenerationFlags: + CU_COREDUMP_DEFAULT_FLAGS = 0 + CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = 1 + CU_COREDUMP_SKIP_GLOBAL_MEMORY = 2 + CU_COREDUMP_SKIP_SHARED_MEMORY = 4 + CU_COREDUMP_SKIP_LOCAL_MEMORY = 8 + CU_COREDUMP_LIGHTWEIGHT_FLAGS = 15 + CU_COREDUMP_SKIP_ABORT = 16 cdef struct CUdevResourceDesc_st: pass @@ -2138,6 +2186,10 @@ cdef extern from "cuda.h": ctypedef enum CUgreenCtxCreate_flags: CU_GREEN_CTX_DEFAULT_STREAM = 1 + ctypedef enum CUdevSmResourceSplit_flags: + CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING = 1 + CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE = 2 + ctypedef enum CUdevResourceType: CU_DEV_RESOURCE_TYPE_INVALID = 0 CU_DEV_RESOURCE_TYPE_SM = 1 @@ -2504,6 +2556,11 @@ cdef CUresult cuCtxCreate(CUcontext* pctx, unsigned int flags, CUdevice dev) exc cdef CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuCtxCreate_v4' in found_functions}} + +cdef CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuCtxDestroy_v2' in found_functions}} cdef CUresult cuCtxDestroy(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil @@ -2594,6 +2651,16 @@ cdef CUresult cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil cdef CUresult cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuCtxRecordEvent' in found_functions}} + +cdef CUresult cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + +{{if 'cuCtxWaitEvent' in found_functions}} + +cdef CUresult cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuCtxAttach' in found_functions}} cdef CUresult cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil @@ -2739,6 +2806,11 @@ cdef CUresult cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUDA cdef CUresult cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuKernelGetLibrary' in found_functions}} + +cdef CUresult cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuLibraryGetGlobal' in found_functions}} cdef CUresult cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil @@ -3384,6 +3456,11 @@ cdef CUresult cuStreamGetId(CUstream hStream, unsigned long long* streamId) exce cdef CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuStreamGetCtx_v2' in found_functions}} + +cdef CUresult cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuStreamWaitEvent' in found_functions}} cdef CUresult cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil @@ -4509,6 +4586,11 @@ cdef CUresult cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ cdef CUresult cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} +{{if 'cuGreenCtxStreamCreate' in found_functions}} + +cdef CUresult cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil +{{endif}} + {{if 'cuProfilerStart' in found_functions}} cdef CUresult cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil @@ -4614,7 +4696,7 @@ cdef CUresult cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource* pCudaResou cdef CUresult cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource* pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil {{endif}} -cdef enum: CUDA_VERSION = 12040 +cdef enum: CUDA_VERSION = 12050 cdef enum: CU_IPC_HANDLE_SIZE = 64 @@ -4648,6 +4730,8 @@ cdef enum: CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN = 10 cdef enum: CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13 +cdef enum: CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14 + cdef enum: CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1 cdef enum: CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3 @@ -4710,6 +4794,8 @@ cdef enum: CUDA_ARRAY3D_SPARSE = 64 cdef enum: CUDA_ARRAY3D_DEFERRED_MAPPING = 128 +cdef enum: CUDA_ARRAY3D_VIDEO_ENCODE_DECODE = 256 + cdef enum: CU_TRSA_OVERRIDE_FORMAT = 1 cdef enum: CU_TRSF_READ_AS_INTEGER = 1 diff --git a/cuda/ccuda.pyx.in b/cuda/ccuda.pyx.in index be684e71..48c06b5b 100644 --- a/cuda/ccuda.pyx.in +++ b/cuda/ccuda.pyx.in @@ -175,6 +175,12 @@ cdef CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, return ccuda._cuCtxCreate_v3(pctx, paramsArray, numParams, flags, dev) {{endif}} +{{if 'cuCtxCreate_v4' in found_functions}} + +cdef CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil: + return ccuda._cuCtxCreate_v4(pctx, ctxCreateParams, flags, dev) +{{endif}} + {{if 'cuCtxDestroy_v2' in found_functions}} cdef CUresult cuCtxDestroy(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -283,6 +289,18 @@ cdef CUresult cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAff return ccuda._cuCtxGetExecAffinity(pExecAffinity, typename) {{endif}} +{{if 'cuCtxRecordEvent' in found_functions}} + +cdef CUresult cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil: + return ccuda._cuCtxRecordEvent(hCtx, hEvent) +{{endif}} + +{{if 'cuCtxWaitEvent' in found_functions}} + +cdef CUresult cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil: + return ccuda._cuCtxWaitEvent(hCtx, hEvent) +{{endif}} + {{if 'cuCtxAttach' in found_functions}} cdef CUresult cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -457,6 +475,12 @@ cdef CUresult cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CU return ccuda._cuKernelGetFunction(pFunc, kernel) {{endif}} +{{if 'cuKernelGetLibrary' in found_functions}} + +cdef CUresult cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil: + return ccuda._cuKernelGetLibrary(pLib, kernel) +{{endif}} + {{if 'cuLibraryGetGlobal' in found_functions}} cdef CUresult cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -1231,6 +1255,12 @@ cdef CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERR return ccuda._cuStreamGetCtx(hStream, pctx) {{endif}} +{{if 'cuStreamGetCtx_v2' in found_functions}} + +cdef CUresult cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil: + return ccuda._cuStreamGetCtx_v2(hStream, pCtx, pGreenCtx) +{{endif}} + {{if 'cuStreamWaitEvent' in found_functions}} cdef CUresult cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil: @@ -2581,6 +2611,12 @@ cdef CUresult cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?C return ccuda._cuStreamGetGreenCtx(hStream, phCtx) {{endif}} +{{if 'cuGreenCtxStreamCreate' in found_functions}} + +cdef CUresult cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil: + return ccuda._cuGreenCtxStreamCreate(phStream, greenCtx, flags, priority) +{{endif}} + {{if 'cuProfilerStart' in found_functions}} cdef CUresult cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil: diff --git a/cuda/ccudart.pxd.in b/cuda/ccudart.pxd.in index 63f9cf68..86e37220 100644 --- a/cuda/ccudart.pxd.in +++ b/cuda/ccudart.pxd.in @@ -766,6 +766,7 @@ cdef extern from "driver_types.h": cudaLaunchAttributeMemSyncDomain = 10 cudaLaunchAttributeLaunchCompletionEvent = 12 cudaLaunchAttributeDeviceUpdatableKernelNode = 13 + cudaLaunchAttributePreferredSharedMemoryCarveout = 14 cdef struct anon_struct20: unsigned int x @@ -799,6 +800,7 @@ cdef extern from "driver_types.h": cudaLaunchMemSyncDomain memSyncDomain anon_struct22 launchCompletionEvent anon_struct23 deviceUpdatableKernelNode + unsigned int sharedMemCarveout cdef struct cudaLaunchAttribute_st: cudaLaunchAttributeID id @@ -1179,7 +1181,8 @@ cdef extern from "driver_types.h": cudaDevAttrReserved132 = 132 cudaDevAttrMpsEnabled = 133 cudaDevAttrHostNumaId = 134 - cudaDevAttrMax = 135 + cudaDevAttrD3D12CigSupported = 135 + cudaDevAttrMax = 136 cdef enum cudaMemPoolAttr: cudaMemPoolReuseFollowEventDependencies = 1 @@ -1213,6 +1216,7 @@ cdef extern from "driver_types.h": cudaMemHandleTypePosixFileDescriptor = 1 cudaMemHandleTypeWin32 = 2 cudaMemHandleTypeWin32Kmt = 4 + cudaMemHandleTypeFabric = 8 cdef enum cudaGraphMemAttributeType: cudaGraphMemAttrUsedMemCurrent = 0 @@ -2979,6 +2983,11 @@ cdef cudaError_t cudaGraphConditionalHandleCreate(cudaGraphConditionalHandle* pH cdef cudaError_t cudaGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil {{endif}} +{{if 'cudaGetDriverEntryPointByVersion' in found_functions}} + +cdef cudaError_t cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil +{{endif}} + {{if 'cudaGetExportTable' in found_functions}} cdef cudaError_t cudaGetExportTable(const void** ppExportTable, const cudaUUID_t* pExportTableId) except ?cudaErrorCallRequiresNewerDriver nogil @@ -3260,6 +3269,8 @@ cdef enum: cudaKernelNodeAttributeMemSyncDomainMap = 9 cdef enum: cudaKernelNodeAttributeMemSyncDomain = 10 +cdef enum: cudaKernelNodeAttributePreferredSharedMemoryCarveout = 14 + cdef enum: cudaKernelNodeAttributeDeviceUpdatableKernelNode = 13 cdef enum: cudaSurfaceType1D = 1 @@ -3290,8 +3301,8 @@ cdef enum: cudaTextureType2DLayered = 242 cdef enum: cudaTextureTypeCubemapLayered = 252 -cdef enum: CUDART_VERSION = 12040 +cdef enum: CUDART_VERSION = 12050 -cdef enum: __CUDART_API_VERSION = 12040 +cdef enum: __CUDART_API_VERSION = 12050 cdef enum: CUDA_EGL_MAX_PLANES = 3 \ No newline at end of file diff --git a/cuda/ccudart.pyx.in b/cuda/ccudart.pyx.in index 108f3eba..0ffe3b7e 100644 --- a/cuda/ccudart.pyx.in +++ b/cuda/ccudart.pyx.in @@ -2295,6 +2295,13 @@ cdef cudaError_t cudaGetDriverEntryPoint(const char* symbol, void** funcPtr, uns {{endif}} +{{if 'cudaGetDriverEntryPointByVersion' in found_functions}} + +cdef cudaError_t cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil: + return _cudaGetDriverEntryPointByVersion(symbol, funcPtr, cudaVersion, flags, driverStatus) + +{{endif}} + {{if 'cudaGetExportTable' in found_functions}} cdef cudaError_t cudaGetExportTable(const void** ppExportTable, const cudaUUID_t* pExportTableId) except ?cudaErrorCallRequiresNewerDriver nogil: diff --git a/cuda/cuda.pxd.in b/cuda/cuda.pxd.in index fba9354e..bf701040 100644 --- a/cuda/cuda.pxd.in +++ b/cuda/cuda.pxd.in @@ -13,7 +13,7 @@ cimport cuda._lib.utils as utils cdef class CUcontext: """ - CUDA context + A regular context handle Methods ------- @@ -365,37 +365,37 @@ cdef class CUasyncCallbackHandle: cdef ccuda.CUasyncCallbackHandle* _ptr {{endif}} -{{if 'CUlinkState' in found_types}} +{{if 'CUgreenCtx' in found_types}} -cdef class CUlinkState: +cdef class CUgreenCtx: """ + A green context handle. This handle can be used safely from only one CPU thread at a time. Created via cuGreenCtxCreate + Methods ------- getPtr() Get memory address of class instance """ - cdef ccuda.CUlinkState __val - cdef ccuda.CUlinkState* _ptr - cdef list _keepalive + cdef ccuda.CUgreenCtx __val + cdef ccuda.CUgreenCtx* _ptr {{endif}} -{{if 'CUgreenCtx' in found_types}} +{{if 'CUlinkState' in found_types}} -cdef class CUgreenCtx: +cdef class CUlinkState: """ - A green context handle. This handle can be used safely from only one CPU thread at a time. Created via cuGreenCtxCreate - Methods ------- getPtr() Get memory address of class instance """ - cdef ccuda.CUgreenCtx __val - cdef ccuda.CUgreenCtx* _ptr + cdef ccuda.CUlinkState __val + cdef ccuda.CUlinkState* _ptr + cdef list _keepalive {{endif}} {{if 'CUdevResourceDesc' in found_types}} @@ -1473,6 +1473,9 @@ cdef class CUlaunchAttributeValue_union: deviceUpdatableKernelNode : anon_struct4 Value of launch attribute CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + sharedMemCarveout : unsigned int + Value of launch attribute + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. Methods ------- @@ -1615,6 +1618,59 @@ cdef class CUexecAffinityParam_st: cdef ccuda.CUexecAffinityParam_st* _ptr cdef anon_union3 _param {{endif}} +{{if 'struct CUctxCigParam_st' in found_types}} + +cdef class CUctxCigParam_st: + """ + CIG Context Create Params + + Attributes + ---------- + sharedDataType : CUcigDataType + + sharedData : Any + + + Methods + ------- + getPtr() + Get memory address of class instance + + """ + cdef ccuda.CUctxCigParam_st __val + cdef ccuda.CUctxCigParam_st* _ptr +{{endif}} +{{if 'struct CUctxCreateParams_st' in found_types}} + +cdef class CUctxCreateParams_st: + """ + Params for creating CUDA context Exactly one of execAffinityParams + and cigParams must be non-NULL. + + Attributes + ---------- + execAffinityParams : CUexecAffinityParam + + numExecAffinityParams : int + + cigParams : CUctxCigParam + + + Methods + ------- + getPtr() + Get memory address of class instance + + """ + cdef ccuda.CUctxCreateParams_st __val + cdef ccuda.CUctxCreateParams_st* _ptr + cdef size_t _execAffinityParams_length + cdef ccuda.CUexecAffinityParam* _execAffinityParams + + cdef size_t _cigParams_length + cdef ccuda.CUctxCigParam* _cigParams + +{{endif}} {{if 'struct CUlibraryHostUniversalFunctionAndDataTable_st' in found_types}} cdef class CUlibraryHostUniversalFunctionAndDataTable_st: @@ -4542,6 +4598,9 @@ cdef class CUlaunchAttributeValue(CUlaunchAttributeValue_union): deviceUpdatableKernelNode : anon_struct4 Value of launch attribute CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + sharedMemCarveout : unsigned int + Value of launch attribute + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. Methods ------- @@ -4660,6 +4719,9 @@ cdef class CUkernelNodeAttrValue_v1(CUlaunchAttributeValue): deviceUpdatableKernelNode : anon_struct4 Value of launch attribute CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + sharedMemCarveout : unsigned int + Value of launch attribute + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. Methods ------- @@ -4720,6 +4782,9 @@ cdef class CUkernelNodeAttrValue(CUkernelNodeAttrValue_v1): deviceUpdatableKernelNode : anon_struct4 Value of launch attribute CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + sharedMemCarveout : unsigned int + Value of launch attribute + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. Methods ------- @@ -4780,6 +4845,9 @@ cdef class CUstreamAttrValue_v1(CUlaunchAttributeValue): deviceUpdatableKernelNode : anon_struct4 Value of launch attribute CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + sharedMemCarveout : unsigned int + Value of launch attribute + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. Methods ------- @@ -4840,6 +4908,9 @@ cdef class CUstreamAttrValue(CUstreamAttrValue_v1): deviceUpdatableKernelNode : anon_struct4 Value of launch attribute CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + sharedMemCarveout : unsigned int + Value of launch attribute + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. Methods ------- @@ -4921,6 +4992,51 @@ cdef class CUexecAffinityParam(CUexecAffinityParam_v1): param : anon_union3 + Methods + ------- + getPtr() + Get memory address of class instance + + """ + pass +{{endif}} +{{if 'CUctxCigParam' in found_types}} + +cdef class CUctxCigParam(CUctxCigParam_st): + """ + CIG Context Create Params + + Attributes + ---------- + sharedDataType : CUcigDataType + + sharedData : Any + + + Methods + ------- + getPtr() + Get memory address of class instance + + """ + pass +{{endif}} +{{if 'CUctxCreateParams' in found_types}} + +cdef class CUctxCreateParams(CUctxCreateParams_st): + """ + Params for creating CUDA context Exactly one of execAffinityParams + and cigParams must be non-NULL. + + Attributes + ---------- + execAffinityParams : CUexecAffinityParam + + numExecAffinityParams : int + + cigParams : CUctxCigParam + + Methods ------- getPtr() diff --git a/cuda/cuda.pyx.in b/cuda/cuda.pyx.in index 57036017..74f49d97 100644 --- a/cuda/cuda.pyx.in +++ b/cuda/cuda.pyx.in @@ -98,6 +98,8 @@ CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN = ccuda.CU_KERNEL_NODE_ATTRIBUTE_MEM_SY CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = ccuda.CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE +CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = ccuda.CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT + CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = ccuda.CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = ccuda.CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY @@ -249,6 +251,10 @@ CUDA_ARRAY3D_SPARSE = ccuda.CUDA_ARRAY3D_SPARSE #: will allow deferred memory mapping CUDA_ARRAY3D_DEFERRED_MAPPING = ccuda.CUDA_ARRAY3D_DEFERRED_MAPPING +#: This flag indicates that the CUDA array will be used for hardware +#: accelerated video encode/decode operations. +CUDA_ARRAY3D_VIDEO_ENCODE_DECODE = ccuda.CUDA_ARRAY3D_VIDEO_ENCODE_DECODE + #: Override the texref format with a format inferred from the array. Flag #: for :py:obj:`~.cuTexRefSetArray()` CU_TRSA_OVERRIDE_FORMAT = ccuda.CU_TRSA_OVERRIDE_FORMAT @@ -807,10 +813,62 @@ class CUarray_format(IntEnum): #: 4 channel unsigned normalized block-compressed (BC7 compression) #: format with sRGB encoding CU_AD_FORMAT_BC7_UNORM_SRGB = ccuda.CUarray_format_enum.CU_AD_FORMAT_BC7_UNORM_SRGB{{endif}} + {{if 'CU_AD_FORMAT_P010' in found_values}} + + #: 10-bit YUV planar format, with 4:2:0 sampling + CU_AD_FORMAT_P010 = ccuda.CUarray_format_enum.CU_AD_FORMAT_P010{{endif}} + {{if 'CU_AD_FORMAT_P016' in found_values}} + + #: 16-bit YUV planar format, with 4:2:0 sampling + CU_AD_FORMAT_P016 = ccuda.CUarray_format_enum.CU_AD_FORMAT_P016{{endif}} + {{if 'CU_AD_FORMAT_NV16' in found_values}} + + #: 8-bit YUV planar format, with 4:2:2 sampling + CU_AD_FORMAT_NV16 = ccuda.CUarray_format_enum.CU_AD_FORMAT_NV16{{endif}} + {{if 'CU_AD_FORMAT_P210' in found_values}} + + #: 10-bit YUV planar format, with 4:2:2 sampling + CU_AD_FORMAT_P210 = ccuda.CUarray_format_enum.CU_AD_FORMAT_P210{{endif}} + {{if 'CU_AD_FORMAT_P216' in found_values}} + + #: 16-bit YUV planar format, with 4:2:2 sampling + CU_AD_FORMAT_P216 = ccuda.CUarray_format_enum.CU_AD_FORMAT_P216{{endif}} + {{if 'CU_AD_FORMAT_YUY2' in found_values}} + + #: 2 channel, 8-bit YUV packed planar format, with 4:2:2 sampling + CU_AD_FORMAT_YUY2 = ccuda.CUarray_format_enum.CU_AD_FORMAT_YUY2{{endif}} + {{if 'CU_AD_FORMAT_Y210' in found_values}} + + #: 2 channel, 10-bit YUV packed planar format, with 4:2:2 sampling + CU_AD_FORMAT_Y210 = ccuda.CUarray_format_enum.CU_AD_FORMAT_Y210{{endif}} + {{if 'CU_AD_FORMAT_Y216' in found_values}} + + #: 2 channel, 16-bit YUV packed planar format, with 4:2:2 sampling + CU_AD_FORMAT_Y216 = ccuda.CUarray_format_enum.CU_AD_FORMAT_Y216{{endif}} + {{if 'CU_AD_FORMAT_AYUV' in found_values}} + + #: 4 channel, 8-bit YUV packed planar format, with 4:4:4 sampling + CU_AD_FORMAT_AYUV = ccuda.CUarray_format_enum.CU_AD_FORMAT_AYUV{{endif}} + {{if 'CU_AD_FORMAT_Y410' in found_values}} + + #: 10-bit YUV packed planar format, with 4:4:4 sampling + CU_AD_FORMAT_Y410 = ccuda.CUarray_format_enum.CU_AD_FORMAT_Y410{{endif}} {{if 'CU_AD_FORMAT_NV12' in found_values}} #: 8-bit YUV planar format, with 4:2:0 sampling CU_AD_FORMAT_NV12 = ccuda.CUarray_format_enum.CU_AD_FORMAT_NV12{{endif}} + {{if 'CU_AD_FORMAT_Y416' in found_values}} + + #: 4 channel, 12-bit YUV packed planar format, with 4:4:4 sampling + CU_AD_FORMAT_Y416 = ccuda.CUarray_format_enum.CU_AD_FORMAT_Y416{{endif}} + {{if 'CU_AD_FORMAT_Y444_PLANAR8' in found_values}} + + #: 3 channel 8-bit YUV planar format, with 4:4:4 sampling + CU_AD_FORMAT_Y444_PLANAR8 = ccuda.CUarray_format_enum.CU_AD_FORMAT_Y444_PLANAR8{{endif}} + {{if 'CU_AD_FORMAT_Y444_PLANAR10' in found_values}} + + #: 3 channel 10-bit YUV planar format, with 4:4:4 sampling + CU_AD_FORMAT_Y444_PLANAR10 = ccuda.CUarray_format_enum.CU_AD_FORMAT_Y444_PLANAR10{{endif}} {{if 'CU_AD_FORMAT_UNORM_INT8X1' in found_values}} #: 1 channel unsigned 8-bit normalized integer @@ -859,6 +917,8 @@ class CUarray_format(IntEnum): #: 4 channel signed 16-bit normalized integer CU_AD_FORMAT_SNORM_INT16X4 = ccuda.CUarray_format_enum.CU_AD_FORMAT_SNORM_INT16X4{{endif}} + {{if 'CU_AD_FORMAT_MAX' in found_values}} + CU_AD_FORMAT_MAX = ccuda.CUarray_format_enum.CU_AD_FORMAT_MAX{{endif}} {{endif}} {{if 'CUaddress_mode_enum' in found_types}} @@ -1487,8 +1547,13 @@ class CUdevice_attribute(IntEnum): #: Device supports unified function pointers. CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS = ccuda.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS{{endif}} {{if 'CU_DEVICE_ATTRIBUTE_NUMA_CONFIG' in found_values}} + + #: NUMA configuration of a device: value is of type + #: :py:obj:`~.CUdeviceNumaConfig` enum CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = ccuda.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG{{endif}} {{if 'CU_DEVICE_ATTRIBUTE_NUMA_ID' in found_values}} + + #: NUMA node ID of the GPU memory CU_DEVICE_ATTRIBUTE_NUMA_ID = ccuda.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_NUMA_ID{{endif}} {{if 'CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED' in found_values}} @@ -1503,6 +1568,10 @@ class CUdevice_attribute(IntEnum): #: NUMA ID of the host node closest to the device. Returns -1 when #: system does not support NUMA. CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = ccuda.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID{{endif}} + {{if 'CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED' in found_values}} + + #: Device supports CIG with D3D12. + CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = ccuda.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED{{endif}} {{if 'CU_DEVICE_ATTRIBUTE_MAX' in found_values}} CU_DEVICE_ATTRIBUTE_MAX = ccuda.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX{{endif}} {{endif}} @@ -2447,6 +2516,22 @@ class CUlimit(IntEnum): #: A size in bytes for L2 persisting lines cache size CU_LIMIT_PERSISTING_L2_CACHE_SIZE = ccuda.CUlimit_enum.CU_LIMIT_PERSISTING_L2_CACHE_SIZE{{endif}} + {{if 'CU_LIMIT_SHMEM_SIZE' in found_values}} + + #: A maximum size in bytes of shared memory available to CUDA kernels + #: on a CIG context. Can only be queried, cannot be set + CU_LIMIT_SHMEM_SIZE = ccuda.CUlimit_enum.CU_LIMIT_SHMEM_SIZE{{endif}} + {{if 'CU_LIMIT_CIG_ENABLED' in found_values}} + + #: A non-zero value indicates this CUDA context is a CIG-enabled + #: context. Can only be queried, cannot be set + CU_LIMIT_CIG_ENABLED = ccuda.CUlimit_enum.CU_LIMIT_CIG_ENABLED{{endif}} + {{if 'CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED' in found_values}} + + #: When set to a non-zero value, CUDA will fail to launch a kernel on a + #: CIG context, instead of using the fallback path, if the kernel uses + #: more shared memory than available + CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = ccuda.CUlimit_enum.CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED{{endif}} {{if 'CU_LIMIT_MAX' in found_values}} CU_LIMIT_MAX = ccuda.CUlimit_enum.CU_LIMIT_MAX{{endif}} {{endif}} @@ -2857,6 +2942,18 @@ class CUlaunchAttributeID(IntEnum): #: updatable nodes, the graph must be uploaded before it is launched #: again. CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = ccuda.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE{{endif}} + {{if 'CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT' in found_values}} + + #: Valid for launches. On devices where the L1 cache and shared memory + #: use the same hardware resources, setting + #: :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage + #: between 0-100 signals the CUDA driver to set the shared memory + #: carveout preference, in percent of the total shared memory for that + #: kernel launch. This attribute takes precedence over + #: :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This + #: is only a hint, and the CUDA driver can choose a different + #: configuration if required for the launch. + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = ccuda.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT{{endif}} {{endif}} {{if 'CUstreamCaptureStatus_enum' in found_types}} @@ -2947,6 +3044,15 @@ class CUexecAffinityType(IntEnum): {{if 'CU_EXEC_AFFINITY_TYPE_MAX' in found_values}} CU_EXEC_AFFINITY_TYPE_MAX = ccuda.CUexecAffinityType_enum.CU_EXEC_AFFINITY_TYPE_MAX{{endif}} {{endif}} +{{if 'CUcigDataType_enum' in found_types}} + +class CUcigDataType(IntEnum): + """ + + """ + {{if 'CIG_DATA_TYPE_D3D12_COMMAND_QUEUE' in found_values}} + CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = ccuda.CUcigDataType_enum.CIG_DATA_TYPE_D3D12_COMMAND_QUEUE{{endif}} +{{endif}} {{if 'CUlibraryOption_enum' in found_types}} class CUlibraryOption(IntEnum): @@ -4423,7 +4529,7 @@ class CUgraphInstantiate_flags(IntEnum): class CUdeviceNumaConfig(IntEnum): """ - + CUDA device NUMA configuration """ {{if 'CU_DEVICE_NUMA_CONFIG_NONE' in found_values}} @@ -4481,9 +4587,32 @@ class CUcoredumpSettings(IntEnum): CU_COREDUMP_FILE = ccuda.CUcoredumpSettings_enum.CU_COREDUMP_FILE{{endif}} {{if 'CU_COREDUMP_PIPE' in found_values}} CU_COREDUMP_PIPE = ccuda.CUcoredumpSettings_enum.CU_COREDUMP_PIPE{{endif}} + {{if 'CU_COREDUMP_GENERATION_FLAGS' in found_values}} + CU_COREDUMP_GENERATION_FLAGS = ccuda.CUcoredumpSettings_enum.CU_COREDUMP_GENERATION_FLAGS{{endif}} {{if 'CU_COREDUMP_MAX' in found_values}} CU_COREDUMP_MAX = ccuda.CUcoredumpSettings_enum.CU_COREDUMP_MAX{{endif}} {{endif}} +{{if 'CUCoredumpGenerationFlags' in found_types}} + +class CUCoredumpGenerationFlags(IntEnum): + """ + Flags for controlling coredump contents + """ + {{if 'CU_COREDUMP_DEFAULT_FLAGS' in found_values}} + CU_COREDUMP_DEFAULT_FLAGS = ccuda.CUCoredumpGenerationFlags.CU_COREDUMP_DEFAULT_FLAGS{{endif}} + {{if 'CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES' in found_values}} + CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = ccuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES{{endif}} + {{if 'CU_COREDUMP_SKIP_GLOBAL_MEMORY' in found_values}} + CU_COREDUMP_SKIP_GLOBAL_MEMORY = ccuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_GLOBAL_MEMORY{{endif}} + {{if 'CU_COREDUMP_SKIP_SHARED_MEMORY' in found_values}} + CU_COREDUMP_SKIP_SHARED_MEMORY = ccuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_SHARED_MEMORY{{endif}} + {{if 'CU_COREDUMP_SKIP_LOCAL_MEMORY' in found_values}} + CU_COREDUMP_SKIP_LOCAL_MEMORY = ccuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_LOCAL_MEMORY{{endif}} + {{if 'CU_COREDUMP_LIGHTWEIGHT_FLAGS' in found_values}} + CU_COREDUMP_LIGHTWEIGHT_FLAGS = ccuda.CUCoredumpGenerationFlags.CU_COREDUMP_LIGHTWEIGHT_FLAGS{{endif}} + {{if 'CU_COREDUMP_SKIP_ABORT' in found_values}} + CU_COREDUMP_SKIP_ABORT = ccuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_ABORT{{endif}} +{{endif}} {{if 'CUgreenCtxCreate_flags' in found_types}} class CUgreenCtxCreate_flags(IntEnum): @@ -4495,6 +4624,17 @@ class CUgreenCtxCreate_flags(IntEnum): #: Required. Creates a default stream to use inside the green context CU_GREEN_CTX_DEFAULT_STREAM = ccuda.CUgreenCtxCreate_flags.CU_GREEN_CTX_DEFAULT_STREAM{{endif}} {{endif}} +{{if 'CUdevSmResourceSplit_flags' in found_types}} + +class CUdevSmResourceSplit_flags(IntEnum): + """ + + """ + {{if 'CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING' in found_values}} + CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING = ccuda.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING{{endif}} + {{if 'CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE' in found_values}} + CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE = ccuda.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE{{endif}} +{{endif}} {{if 'CUdevResourceType' in found_types}} class CUdevResourceType(IntEnum): @@ -5452,6 +5592,18 @@ class CUkernelNodeAttrID(IntEnum): #: updatable nodes, the graph must be uploaded before it is launched #: again. CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = ccuda.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE{{endif}} + {{if 'CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT' in found_values}} + + #: Valid for launches. On devices where the L1 cache and shared memory + #: use the same hardware resources, setting + #: :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage + #: between 0-100 signals the CUDA driver to set the shared memory + #: carveout preference, in percent of the total shared memory for that + #: kernel launch. This attribute takes precedence over + #: :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This + #: is only a hint, and the CUDA driver can choose a different + #: configuration if required for the launch. + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = ccuda.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT{{endif}} {{endif}} {{if 'CUlaunchAttributeID_enum' in found_types}} @@ -5595,6 +5747,18 @@ class CUstreamAttrID(IntEnum): #: updatable nodes, the graph must be uploaded before it is launched #: again. CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = ccuda.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE{{endif}} + {{if 'CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT' in found_values}} + + #: Valid for launches. On devices where the L1 cache and shared memory + #: use the same hardware resources, setting + #: :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage + #: between 0-100 signals the CUDA driver to set the shared memory + #: carveout preference, in percent of the total shared memory for that + #: kernel launch. This attribute takes precedence over + #: :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This + #: is only a hint, and the CUDA driver can choose a different + #: configuration if required for the launch. + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = ccuda.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT{{endif}} {{endif}} {{if 'CUmemGenericAllocationHandle' in found_types}} @@ -5629,7 +5793,7 @@ cdef class CUmemGenericAllocationHandle: cdef class CUcontext: """ - CUDA context + A regular context handle Methods ------- @@ -6275,11 +6439,13 @@ cdef class CUasyncCallbackHandle: return self._ptr {{endif}} -{{if 'CUlinkState' in found_types}} +{{if 'CUgreenCtx' in found_types}} -cdef class CUlinkState: +cdef class CUgreenCtx: """ + A green context handle. This handle can be used safely from only one CPU thread at a time. Created via cuGreenCtxCreate + Methods ------- getPtr() @@ -6289,13 +6455,13 @@ cdef class CUlinkState: def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0): if _ptr == 0: self._ptr = &self.__val - self._ptr[0] = init_value + self._ptr[0] = init_value else: - self._ptr = _ptr + self._ptr = _ptr def __init__(self, *args, **kwargs): - self._keepalive = [] + pass def __repr__(self): - return '' + return '' def __index__(self): return self.__int__() def __int__(self): @@ -6304,13 +6470,11 @@ cdef class CUlinkState: return self._ptr {{endif}} -{{if 'CUgreenCtx' in found_types}} +{{if 'CUlinkState' in found_types}} -cdef class CUgreenCtx: +cdef class CUlinkState: """ - A green context handle. This handle can be used safely from only one CPU thread at a time. Created via cuGreenCtxCreate - Methods ------- getPtr() @@ -6320,13 +6484,13 @@ cdef class CUgreenCtx: def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0): if _ptr == 0: self._ptr = &self.__val - self._ptr[0] = init_value + self._ptr[0] = init_value else: - self._ptr = _ptr + self._ptr = _ptr def __init__(self, *args, **kwargs): - pass + self._keepalive = [] def __repr__(self): - return '' + return '' def __index__(self): return self.__int__() def __int__(self): @@ -9597,6 +9761,9 @@ cdef class CUlaunchAttributeValue_union: deviceUpdatableKernelNode : anon_struct4 Value of launch attribute CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + sharedMemCarveout : unsigned int + Value of launch attribute + CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. Methods ------- @@ -9675,6 +9842,10 @@ cdef class CUlaunchAttributeValue_union: str_list += ['deviceUpdatableKernelNode :\n' + '\n'.join([' ' + line for line in str(self.deviceUpdatableKernelNode).splitlines()])] except ValueError: str_list += ['deviceUpdatableKernelNode : '] + try: + str_list += ['sharedMemCarveout : ' + str(self.sharedMemCarveout)] + except ValueError: + str_list += ['sharedMemCarveout : '] return '\n'.join(str_list) else: return '' @@ -9767,6 +9938,12 @@ cdef class CUlaunchAttributeValue_union: @deviceUpdatableKernelNode.setter def deviceUpdatableKernelNode(self, deviceUpdatableKernelNode not None : anon_struct4): string.memcpy(&self._ptr[0].deviceUpdatableKernelNode, deviceUpdatableKernelNode.getPtr(), sizeof(self._ptr[0].deviceUpdatableKernelNode)) + @property + def sharedMemCarveout(self): + return self._ptr[0].sharedMemCarveout + @sharedMemCarveout.setter + def sharedMemCarveout(self, unsigned int sharedMemCarveout): + self._ptr[0].sharedMemCarveout = sharedMemCarveout {{endif}} {{if 'struct CUlaunchAttribute_st' in found_types}} @@ -10148,6 +10325,167 @@ cdef class CUexecAffinityParam_st: def param(self, param not None : anon_union3): string.memcpy(&self._ptr[0].param, param.getPtr(), sizeof(self._ptr[0].param)) {{endif}} +{{if 'struct CUctxCigParam_st' in found_types}} + +cdef class CUctxCigParam_st: + """ + CIG Context Create Params + + Attributes + ---------- + sharedDataType : CUcigDataType + + sharedData : Any + + + Methods + ------- + getPtr() + Get memory address of class instance + + """ + def __cinit__(self, void_ptr _ptr = 0): + if _ptr == 0: + self._ptr = &self.__val + else: + self._ptr = _ptr + def __init__(self, void_ptr _ptr = 0): + pass + def __dealloc__(self): + pass + def getPtr(self): + return self._ptr + def __repr__(self): + if self._ptr is not NULL: + str_list = [] + try: + str_list += ['sharedDataType : ' + str(self.sharedDataType)] + except ValueError: + str_list += ['sharedDataType : '] + try: + str_list += ['sharedData : ' + hex(self.sharedData)] + except ValueError: + str_list += ['sharedData : '] + return '\n'.join(str_list) + else: + return '' + @property + def sharedDataType(self): + return CUcigDataType(self._ptr[0].sharedDataType) + @sharedDataType.setter + def sharedDataType(self, sharedDataType not None : CUcigDataType): + self._ptr[0].sharedDataType = sharedDataType.value + @property + def sharedData(self): + return self._ptr[0].sharedData + @sharedData.setter + def sharedData(self, sharedData): + _csharedData = utils.HelperInputVoidPtr(sharedData) + self._ptr[0].sharedData = _csharedData.cptr +{{endif}} +{{if 'struct CUctxCreateParams_st' in found_types}} + +cdef class CUctxCreateParams_st: + """ + Params for creating CUDA context Exactly one of execAffinityParams + and cigParams must be non-NULL. + + Attributes + ---------- + execAffinityParams : CUexecAffinityParam + + numExecAffinityParams : int + + cigParams : CUctxCigParam + + + Methods + ------- + getPtr() + Get memory address of class instance + + """ + def __cinit__(self, void_ptr _ptr = 0): + if _ptr == 0: + self._ptr = &self.__val + else: + self._ptr = _ptr + def __init__(self, void_ptr _ptr = 0): + pass + def __dealloc__(self): + if self._execAffinityParams is not NULL: + free(self._execAffinityParams) + if self._cigParams is not NULL: + free(self._cigParams) + def getPtr(self): + return self._ptr + def __repr__(self): + if self._ptr is not NULL: + str_list = [] + try: + str_list += ['execAffinityParams : ' + str(self.execAffinityParams)] + except ValueError: + str_list += ['execAffinityParams : '] + try: + str_list += ['numExecAffinityParams : ' + str(self.numExecAffinityParams)] + except ValueError: + str_list += ['numExecAffinityParams : '] + try: + str_list += ['cigParams : ' + str(self.cigParams)] + except ValueError: + str_list += ['cigParams : '] + return '\n'.join(str_list) + else: + return '' + @property + def execAffinityParams(self): + arrs = [self._ptr[0].execAffinityParams + x*sizeof(ccuda.CUexecAffinityParam) for x in range(self._execAffinityParams_length)] + return [CUexecAffinityParam(_ptr=arr) for arr in arrs] + @execAffinityParams.setter + def execAffinityParams(self, val): + if len(val) == 0: + free(self._execAffinityParams) + self._execAffinityParams_length = 0 + self._ptr[0].execAffinityParams = NULL + else: + if self._execAffinityParams_length != len(val): + free(self._execAffinityParams) + self._execAffinityParams = calloc(len(val), sizeof(ccuda.CUexecAffinityParam)) + if self._execAffinityParams is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(ccuda.CUexecAffinityParam))) + self._execAffinityParams_length = len(val) + self._ptr[0].execAffinityParams = self._execAffinityParams + for idx in range(len(val)): + string.memcpy(&self._execAffinityParams[idx], (val[idx])._ptr, sizeof(ccuda.CUexecAffinityParam)) + + @property + def numExecAffinityParams(self): + return self._ptr[0].numExecAffinityParams + @numExecAffinityParams.setter + def numExecAffinityParams(self, int numExecAffinityParams): + self._ptr[0].numExecAffinityParams = numExecAffinityParams + @property + def cigParams(self): + arrs = [self._ptr[0].cigParams + x*sizeof(ccuda.CUctxCigParam) for x in range(self._cigParams_length)] + return [CUctxCigParam(_ptr=arr) for arr in arrs] + @cigParams.setter + def cigParams(self, val): + if len(val) == 0: + free(self._cigParams) + self._cigParams_length = 0 + self._ptr[0].cigParams = NULL + else: + if self._cigParams_length != len(val): + free(self._cigParams) + self._cigParams = calloc(len(val), sizeof(ccuda.CUctxCigParam)) + if self._cigParams is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(ccuda.CUctxCigParam))) + self._cigParams_length = len(val) + self._ptr[0].cigParams = self._cigParams + for idx in range(len(val)): + string.memcpy(&self._cigParams[idx], (val[idx])._ptr, sizeof(ccuda.CUctxCigParam)) + +{{endif}} {{if 'struct CUlibraryHostUniversalFunctionAndDataTable_st' in found_types}} cdef class CUlibraryHostUniversalFunctionAndDataTable_st: @@ -17888,6 +18226,15 @@ def cuDeviceGetAttribute(attrib not None : CUdevice_attribute, dev): Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. + - :py:obj:`~.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG`: NUMA configuration of a + device: value is of type :py:obj:`~.CUdeviceNumaConfig` enum + + - :py:obj:`~.CU_DEVICE_ATTRIBUTE_NUMA_ID`: NUMA node ID of the GPU + memory + + - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED`: Device supports + switch multicast and reduction operations. + Parameters ---------- attrib : :py:obj:`~.CUdevice_attribute` @@ -18976,6 +19323,188 @@ def cuCtxCreate_v3(paramsArray : Optional[List[CUexecAffinityParam]], int numPar return (CUresult(err), pctx) {{endif}} +{{if 'cuCtxCreate_v4' in found_functions}} + +@cython.embedsignature(True) +def cuCtxCreate_v4(ctxCreateParams : Optional[CUctxCreateParams], unsigned int flags, dev): + """ Create a CUDA context. + + Creates a new CUDA context and associates it with the calling thread. + The `flags` parameter is described below. The context is created with a + usage count of 1 and the caller of :py:obj:`~.cuCtxCreate()` must call + :py:obj:`~.cuCtxDestroy()` when done using the context. If a context is + already current to the thread, it is supplanted by the newly created + context and may be restored by a subsequent call to + :py:obj:`~.cuCtxPopCurrent()`. + + CUDA context can be created with execution affinity. The type and the + amount of execution resource the context can use is limited by + `paramsArray` and `numExecAffinityParams` in `execAffinity`. The + `paramsArray` is an array of `CUexecAffinityParam` and the + `numExecAffinityParams` describes the size of the paramsArray. If two + `CUexecAffinityParam` in the array have the same type, the latter + execution affinity parameter overrides the former execution affinity + parameter. The supported execution affinity types are: + + - :py:obj:`~.CU_EXEC_AFFINITY_TYPE_SM_COUNT` limits the portion of SMs + that the context can use. The portion of SMs is specified as the + number of SMs via `CUexecAffinitySmCount`. This limit will be + internally rounded up to the next hardware-supported amount. Hence, + it is imperative to query the actual execution affinity of the + context via `cuCtxGetExecAffinity` after context creation. Currently, + this attribute is only supported under Volta+ MPS. + + CUDA context can be created in CIG(CUDA in Graphics) mode by setting /p + cigParams. Hardware support and software support for graphics clients + can be determined using :py:obj:`~.cuDeviceGetAttribute()` with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED`. Data from graphics + client is shared with CUDA via the /p sharedData in /pcigParams. For + D3D12, /p sharedData is a ID3D12CommandQueue handle. + + Either /p execAffinityParams or /p cigParams can be set to a non-null + value. Setting both to a non-null value will result in an undefined + behavior. + + The three LSBs of the `flags` parameter can be used to control how the + OS thread, which owns the CUDA context at the time of an API call, + interacts with the OS scheduler when waiting for results from the GPU. + Only one of the scheduling flags can be set when creating a context. + + - :py:obj:`~.CU_CTX_SCHED_SPIN`: Instruct CUDA to actively spin when + waiting for results from the GPU. This can decrease latency when + waiting for the GPU, but may lower the performance of CPU threads if + they are performing work in parallel with the CUDA thread. + + - :py:obj:`~.CU_CTX_SCHED_YIELD`: Instruct CUDA to yield its thread + when waiting for results from the GPU. This can increase latency when + waiting for the GPU, but can increase the performance of CPU threads + performing work in parallel with the GPU. + + - :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`: Instruct CUDA to block the + CPU thread on a synchronization primitive when waiting for the GPU to + finish work. + + - :py:obj:`~.CU_CTX_BLOCKING_SYNC`: Instruct CUDA to block the CPU + thread on a synchronization primitive when waiting for the GPU to + finish work. Deprecated: This flag was deprecated as of CUDA 4.0 + and was replaced with :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`. + + - :py:obj:`~.CU_CTX_SCHED_AUTO`: The default value if the `flags` + parameter is zero, uses a heuristic based on the number of active + CUDA contexts in the process `C` and the number of logical processors + in the system `P`. If `C` > `P`, then CUDA will yield to other OS + threads when waiting for the GPU (:py:obj:`~.CU_CTX_SCHED_YIELD`), + otherwise CUDA will not yield while waiting for results and actively + spin on the processor (:py:obj:`~.CU_CTX_SCHED_SPIN`). Additionally, + on Tegra devices, :py:obj:`~.CU_CTX_SCHED_AUTO` uses a heuristic + based on the power profile of the platform and may choose + :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` for low-powered devices. + + - :py:obj:`~.CU_CTX_MAP_HOST`: Instruct CUDA to support mapped pinned + allocations. This flag must be set in order to allocate pinned host + memory that is accessible to the GPU. + + - :py:obj:`~.CU_CTX_LMEM_RESIZE_TO_MAX`: Instruct CUDA to not reduce + local memory after resizing local memory for a kernel. This can + prevent thrashing by local memory allocations when launching many + kernels with high local memory usage at the cost of potentially + increased memory usage. Deprecated: This flag is deprecated and the + behavior enabled by this flag is now the default and cannot be + disabled. Instead, the per-thread stack size can be controlled with + :py:obj:`~.cuCtxSetLimit()`. + + - :py:obj:`~.CU_CTX_COREDUMP_ENABLE`: If GPU coredumps have not been + enabled globally with :py:obj:`~.cuCoredumpSetAttributeGlobal` or + environment variables, this flag can be set during context creation + to instruct CUDA to create a coredump if this context raises an + exception during execution. These environment variables are described + in the CUDA-GDB user guide under the "GPU core dump support" section. + The initial attributes will be taken from the global attributes at + the time of context creation. The other attributes that control + coredump output can be modified by calling + :py:obj:`~.cuCoredumpSetAttribute` from the created context after it + becomes current. This flag is not supported when CUDA context is + created in CIG(CUDA in Graphics) mode. + + - :py:obj:`~.CU_CTX_USER_COREDUMP_ENABLE`: If user-triggered GPU + coredumps have not been enabled globally with + :py:obj:`~.cuCoredumpSetAttributeGlobal` or environment variables, + this flag can be set during context creation to instruct CUDA to + create a coredump if data is written to a certain pipe that is + present in the OS space. These environment variables are described in + the CUDA-GDB user guide under the "GPU core dump support" section. It + is important to note that the pipe name `must` be set with + :py:obj:`~.cuCoredumpSetAttributeGlobal` before creating the context + if this flag is used. Setting this flag implies that + :py:obj:`~.CU_CTX_COREDUMP_ENABLE` is set. The initial attributes + will be taken from the global attributes at the time of context + creation. The other attributes that control coredump output can be + modified by calling :py:obj:`~.cuCoredumpSetAttribute` from the + created context after it becomes current. Setting this flag on any + context creation is equivalent to setting the + :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER` attribute to `true` + globally. This flag is not supported when CUDA context is created in + CIG(CUDA in Graphics) mode. + + - :py:obj:`~.CU_CTX_SYNC_MEMOPS`: Ensures that synchronous memory + operations initiated on this context will always synchronize. See + further documentation in the section titled "API Synchronization + behavior" to learn more about cases when synchronous memory + operations can exhibit asynchronous behavior. + + Context creation will fail with :py:obj:`~.CUDA_ERROR_UNKNOWN` if the + compute mode of the device is :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`. + The function :py:obj:`~.cuDeviceGetAttribute()` can be used with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE` to determine the compute + mode of the device. The `nvidia-smi` tool can be used to set the + compute mode for * devices. Documentation for `nvidia-smi` can be + obtained by passing a -h option to it. + + Context creation will fail with :: CUDA_ERROR_INVALID_VALUE if invalid + parameter was passed by client to create the CUDA context. + + Context creation in CIG mode will fail with + :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` if CIG is not supported by the + device or the driver. Hardware support is available starting Volta. + Software support for graphics clients can be determined using + :py:obj:`~.cuDeviceGetAttribute()` with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED`. + + Parameters + ---------- + ctxCreateParams : :py:obj:`~.CUctxCreateParams` + Context creation parameters + flags : unsigned int + Context creation flags + dev : :py:obj:`~.CUdevice` + Device to create context on + + Returns + ------- + CUresult + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNKNOWN` + pctx : :py:obj:`~.CUcontext` + Returned context handle of the new context + + See Also + -------- + :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCoredumpSetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCtxSynchronize` + """ + cdef ccuda.CUdevice cdev + if dev is None: + cdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + cdev = pdev + else: + pdev = int(CUdevice(dev)) + cdev = pdev + cdef CUcontext pctx = CUcontext() + cdef ccuda.CUctxCreateParams* cctxCreateParams_ptr = ctxCreateParams._ptr if ctxCreateParams != None else NULL + err = ccuda.cuCtxCreate_v4(pctx._ptr, cctxCreateParams_ptr, flags, cdev) + return (CUresult(err), pctx) +{{endif}} + {{if 'cuCtxDestroy_v2' in found_functions}} @cython.embedsignature(True) @@ -18990,13 +19519,16 @@ def cuCtxDestroy(ctx): Destroys and cleans up all resources associated with the context. It is the caller's responsibility to ensure that the context or its resources are not accessed or passed in subsequent API calls and doing so will - result in undefined behavior. These resources include CUDA types such - as :py:obj:`~.CUmodule`, :py:obj:`~.CUfunction`, :py:obj:`~.CUstream`, + result in undefined behavior. These resources include CUDA types + :py:obj:`~.CUmodule`, :py:obj:`~.CUfunction`, :py:obj:`~.CUstream`, :py:obj:`~.CUevent`, :py:obj:`~.CUarray`, :py:obj:`~.CUmipmappedArray`, :py:obj:`~.CUtexObject`, :py:obj:`~.CUsurfObject`, :py:obj:`~.CUtexref`, :py:obj:`~.CUsurfref`, :py:obj:`~.CUgraphicsResource`, :py:obj:`~.CUlinkState`, - :py:obj:`~.CUexternalMemory` and :py:obj:`~.CUexternalSemaphore`. + :py:obj:`~.CUexternalMemory` and :py:obj:`~.CUexternalSemaphore`. These + resources also include memory allocations by :py:obj:`~.cuMemAlloc()`, + :py:obj:`~.cuMemAllocHost()`, :py:obj:`~.cuMemAllocManaged()` and + :py:obj:`~.cuMemAllocPitch()`. If `ctx` is current to the calling thread then `ctx` will also be popped from the current thread's context stack (as though @@ -19018,6 +19550,10 @@ def cuCtxDestroy(ctx): See Also -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize` + + Notes + ----- + :py:obj:`~.cuCtxDestroy()` will not destroy memory allocations by :py:obj:`~.cuMemCreate()`, :py:obj:`~.cuMemAllocAsync()` and :py:obj:`~.cuMemAllocFromPoolAsync()`. These memory allocations are not associated with any CUDA context and need to be destroyed explicitly. """ cdef ccuda.CUcontext cctx if ctx is None: @@ -19293,9 +19829,11 @@ def cuCtxGetId(ctx): @cython.embedsignature(True) def cuCtxSynchronize(): - """ Block for a context's tasks to complete. + """ Block for the current context's tasks to complete. - Blocks until the device has completed all preceding requested tasks. + Blocks until the current context has completed all preceding requested + tasks. If the current context is the primary context, green contexts + that have been created will also be synchronized. :py:obj:`~.cuCtxSynchronize()` returns an error if one of the preceding tasks failed. If the context was created with the :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` flag, the CPU thread will block @@ -19723,6 +20261,127 @@ def cuCtxGetExecAffinity(typename not None : CUexecAffinityType): return (CUresult(err), pExecAffinity) {{endif}} +{{if 'cuCtxRecordEvent' in found_functions}} + +@cython.embedsignature(True) +def cuCtxRecordEvent(hCtx, hEvent): + """ Records an event. + + Captures in `hEvent` all the activities of the context `hCtx` at the + time of this call. `hEvent` and `hCtx` must be from the same CUDA + context, otherwise :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` will be + returned. Calls such as :py:obj:`~.cuEventQuery()` or + :py:obj:`~.cuCtxWaitEvent()` will then examine or wait for completion + of the work that was captured. Uses of `hCtx` after this call do not + modify `hEvent`. If the context passed to `hCtx` is the primary + context, `hEvent` will capture all the activities of the primary + context and its green contexts. If the context passed to `hCtx` is a + context converted from green context via + :py:obj:`~.cuCtxFromGreenCtx()`, `hEvent` will capture only the + activities of the green context. + + Parameters + ---------- + hCtx : :py:obj:`~.CUcontext` + Context to record event for + hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t` + Event to record + + Returns + ------- + CUresult + :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` + + See Also + -------- + :py:obj:`~.cuCtxWaitEvent`, :py:obj:`~.cuGreenCtxRecordEvent`, :py:obj:`~.cuGreenCtxWaitEvent`, :py:obj:`~.cuEventRecord` + + Notes + ----- + The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` if the specified context `hCtx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures. + """ + cdef ccuda.CUevent chEvent + if hEvent is None: + chEvent = 0 + elif isinstance(hEvent, (CUevent,)): + phEvent = int(hEvent) + chEvent = phEvent + else: + phEvent = int(CUevent(hEvent)) + chEvent = phEvent + cdef ccuda.CUcontext chCtx + if hCtx is None: + chCtx = 0 + elif isinstance(hCtx, (CUcontext,)): + phCtx = int(hCtx) + chCtx = phCtx + else: + phCtx = int(CUcontext(hCtx)) + chCtx = phCtx + err = ccuda.cuCtxRecordEvent(chCtx, chEvent) + return (CUresult(err),) +{{endif}} + +{{if 'cuCtxWaitEvent' in found_functions}} + +@cython.embedsignature(True) +def cuCtxWaitEvent(hCtx, hEvent): + """ Make a context wait on an event. + + Makes all future work submitted to context `hCtx` wait for all work + captured in `hEvent`. The synchronization will be performed on the + device and will not block the calling CPU thread. See + :py:obj:`~.cuCtxRecordEvent()` for details on what is captured by an + event. If the context passed to `hCtx` is the primary context, the + primary context and its green contexts will wait for `hEvent`. If the + context passed to `hCtx` is a context converted from green context via + :py:obj:`~.cuCtxFromGreenCtx()`, the green context will wait for + `hEvent`. + + Parameters + ---------- + hCtx : :py:obj:`~.CUcontext` + Context to wait + hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t` + Event to wait on + + Returns + ------- + CUresult + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` + + See Also + -------- + :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuGreenCtxRecordEvent`, :py:obj:`~.cuGreenCtxWaitEvent`, :py:obj:`~.cuStreamWaitEvent` + + Notes + ----- + `hEvent` may be from a different context or device than `hCtx`. + + The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` and invalidate the capture if the specified event `hEvent` is part of an ongoing capture sequence or if the specified context `hCtx` has a stream in the capture mode. + """ + cdef ccuda.CUevent chEvent + if hEvent is None: + chEvent = 0 + elif isinstance(hEvent, (CUevent,)): + phEvent = int(hEvent) + chEvent = phEvent + else: + phEvent = int(CUevent(hEvent)) + chEvent = phEvent + cdef ccuda.CUcontext chCtx + if hCtx is None: + chCtx = 0 + elif isinstance(hCtx, (CUcontext,)): + phCtx = int(hCtx) + chCtx = phCtx + else: + phCtx = int(CUcontext(hCtx)) + chCtx = phCtx + err = ccuda.cuCtxWaitEvent(chCtx, chEvent) + return (CUresult(err),) +{{endif}} + {{if 'cuCtxAttach' in found_functions}} @cython.embedsignature(True) @@ -20731,13 +21390,17 @@ def cuLibraryLoadData(code, jitOptions : Optional[List[CUjit_option]], jitOption Returns ------- CUresult - :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND` + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` library : :py:obj:`~.CUlibrary` Returned library See Also -------- :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx` + + Notes + ----- + If the library contains managed variables and no device in the system supports managed variables this call is expected to return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues libraryOptions = [] if libraryOptions is None else libraryOptions @@ -20820,13 +21483,17 @@ def cuLibraryLoadFromFile(char* fileName, jitOptions : Optional[List[CUjit_optio Returns ------- CUresult - :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND` + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` library : :py:obj:`~.CUlibrary` Returned library See Also -------- :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx` + + Notes + ----- + If the library contains managed variables and no device in the system supports managed variables this call is expected to return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues libraryOptions = [] if libraryOptions is None else libraryOptions @@ -21093,6 +21760,45 @@ def cuKernelGetFunction(kernel): return (CUresult(err), pFunc) {{endif}} +{{if 'cuKernelGetLibrary' in found_functions}} + +@cython.embedsignature(True) +def cuKernelGetLibrary(kernel): + """ Returns a library handle. + + Returns in `pLib` the handle of the library for the requested kernel + `kernel` + + Parameters + ---------- + kernel : :py:obj:`~.CUkernel` + Kernel to retrieve library handle + + Returns + ------- + CUresult + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND` + pLib : :py:obj:`~.CUlibrary` + Returned library handle + + See Also + -------- + :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetKernel` + """ + cdef ccuda.CUkernel ckernel + if kernel is None: + ckernel = 0 + elif isinstance(kernel, (CUkernel,)): + pkernel = int(kernel) + ckernel = pkernel + else: + pkernel = int(CUkernel(kernel)) + ckernel = pkernel + cdef CUlibrary pLib = CUlibrary() + err = ccuda.cuKernelGetLibrary(pLib._ptr, ckernel) + return (CUresult(err), pLib) +{{endif}} + {{if 'cuLibraryGetGlobal' in found_functions}} @cython.embedsignature(True) @@ -21173,10 +21879,6 @@ def cuLibraryGetManaged(library, char* name): See Also -------- :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload` - - Notes - ----- - The API requires a CUDA context to be present and initialized on at least one device. If no context is present, the call returns :py:obj:`~.CUDA_ERROR_NOT_FOUND`. """ cdef ccuda.CUlibrary clibrary if library is None: @@ -21433,6 +22135,10 @@ def cuKernelSetAttribute(attrib not None : CUfunction_attribute, int val, kernel compile time, it cannot be set at runtime. Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - :py:obj:`~.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`: + Indicates whether the function can be launched with non-portable + cluster size. 1 is allowed, 0 is disallowed. + - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`: The block scheduling policy of a function. The value type is CUclusterSchedulingPolicy. @@ -22516,9 +23222,10 @@ def cuIpcGetEventHandle(event): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode Users can test their device - for IPC functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` - with :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` Parameters ---------- @@ -22570,9 +23277,10 @@ def cuIpcOpenEventHandle(handle not None : CUipcEventHandle): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode Users can test their device - for IPC functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` - with :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` Parameters ---------- @@ -22613,9 +23321,10 @@ def cuIpcGetMemHandle(dptr): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode Users can test their device - for IPC functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` - with :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` Parameters ---------- @@ -22681,9 +23390,10 @@ def cuIpcOpenMemHandle(handle not None : CUipcMemHandle, unsigned int Flags): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode Users can test their device - for IPC functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` - with :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` Parameters ---------- @@ -22730,9 +23440,10 @@ def cuIpcCloseMemHandle(dptr): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode Users can test their device - for IPC functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` - with :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with + :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED` Parameters ---------- @@ -26151,8 +26862,25 @@ def cuMemCreate(size_t size, prop : Optional[CUmemAllocationProp], unsigned long :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` and :py:obj:`~.CUmemAllocationProp`::CUmemLocation::id must specify the NUMA ID of the CPU. On systems where NUMA is not available - :py:obj:`~.CUmemAllocationProp`::CUmemLocation::id must be set to 0. If - :py:obj:`~.CUmemAllocationProp`::allocFlags::usage contains + :py:obj:`~.CUmemAllocationProp`::CUmemLocation::id must be set to 0. + + Applications can set + :py:obj:`~.CUmemAllocationProp.requestedHandleTypes` to + :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` in order to create allocations + suitable for sharing within an IMEX domain. An IMEX domain is either an + OS instance or a group of securely connected OS instances using the + NVIDIA IMEX daemon. An IMEX channel is a global resource within the + IMEX domain that represents a logical entity that aims to provide fine + grained accessibility control for the participating processes. When + exporter and importer CUDA processes have been granted access to the + same IMEX channel, they can securely share memory. If the allocating + process does not have access setup for an IMEX channel, attempting to + create a :py:obj:`~.CUmemGenericAllocationHandle` with + :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` will result in + :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. The nvidia-modprobe CLI provides + more information regarding setting up of IMEX channels. + + If :py:obj:`~.CUmemAllocationProp`::allocFlags::usage contains :py:obj:`~.CU_MEM_CREATE_USAGE_TILE_POOL` flag then the memory allocation is intended only to be used as backing tile pool for sparse CUDA arrays and sparse CUDA mipmapped arrays. (see @@ -26702,7 +27430,13 @@ def cuMemImportFromShareableHandle(osHandle, shHandleType not None : CUmemAlloca """ Imports an allocation from a requested shareable handle type. If the current process cannot support the memory described by this - shareable handle, this API will error as CUDA_ERROR_NOT_SUPPORTED. + shareable handle, this API will error as + :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`. + + If `shHandleType` is :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` and the + importer process has not been granted access to the same IMEX channel + as the exporter process, this API will error as + :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. Parameters ---------- @@ -27270,6 +28004,21 @@ def cuMemPoolCreate(poolProps : Optional[CUmemPoolProps]): :py:obj:`~.CUmemPoolProps.maxSize`. If set to 0, the maximum size of the pool will default to a system dependent value. + Applications can set :py:obj:`~.CUmemPoolProps.handleTypes` to + :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` in order to create + :py:obj:`~.CUmemoryPool` suitable for sharing within an IMEX domain. An + IMEX domain is either an OS instance or a group of securely connected + OS instances using the NVIDIA IMEX daemon. An IMEX channel is a global + resource within the IMEX domain that represents a logical entity that + aims to provide fine grained accessibility control for the + participating processes. When exporter and importer CUDA processes have + been granted access to the same IMEX channel, they can securely share + memory. If the allocating process does not have access setup for an + IMEX channel, attempting to export a :py:obj:`~.CUmemoryPool` with + :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` will result in + :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. The nvidia-modprobe CLI provides + more information regarding setting up of IMEX channels. + Parameters ---------- poolProps : :py:obj:`~.CUmemPoolProps` @@ -27278,7 +28027,7 @@ def cuMemPoolCreate(poolProps : Optional[CUmemPoolProps]): Returns ------- CUresult - :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` pool : :py:obj:`~.CUmemoryPool` None @@ -27464,6 +28213,11 @@ def cuMemPoolImportFromShareableHandle(handle, handleType not None : CUmemAlloca Specific allocations can be imported from the imported pool with cuMemPoolImportPointer. + If `handleType` is :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` and the + importer process has not been granted access to the same IMEX channel + as the exporter process, this API will error as + :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. + Parameters ---------- handle : Any @@ -27722,12 +28476,12 @@ def cuMulticastBindMem(mcHandle, size_t mcOffset, memHandle, size_t memOffset, s :py:obj:`~.cuMulticastGetGranularity` with the flag :py:obj:`~.CU_MULTICAST_GRANULARITY_RECOMMENDED`. - The `size` + `memOffset` must be smaller than the size of the allocated - memory. Similarly the `size` + `mcOffset` must be smaller than the size - of the multicast object. The memory allocation must have beeen created - on one of the devices that was added to the multicast team via - :py:obj:`~.cuMulticastAddDevice`. Externally shareable as well as - imported multicast objects can be bound only to externally shareable + The `size` + `memOffset` cannot be larger than the size of the + allocated memory. Similarly the `size` + `mcOffset` cannot be larger + than the size of the multicast object. The memory allocation must have + beeen created on one of the devices that was added to the multicast + team via :py:obj:`~.cuMulticastAddDevice`. Externally shareable as well + as imported multicast objects can be bound only to externally shareable memory. Note that this call will return CUDA_ERROR_OUT_OF_MEMORY if there are insufficient resources required to perform the bind. This call may also return CUDA_ERROR_SYSTEM_NOT_READY if the necessary @@ -27796,8 +28550,8 @@ def cuMulticastBindAddr(mcHandle, size_t mcOffset, memptr, size_t size, unsigned returned by :py:obj:`~.cuMulticastGetGranularity` with the flag :py:obj:`~.CU_MULTICAST_GRANULARITY_RECOMMENDED`. - The `size` must be smaller than the size of the allocated memory. - Similarly the `size` + `mcOffset` must be smaller than the total size + The `size` cannot be larger than the size of the allocated memory. + Similarly the `size` + `mcOffset` cannot be larger than the total size of the multicast object. The memory allocation must have beeen created on one of the devices that was added to the multicast team via :py:obj:`~.cuMulticastAddDevice`. Externally shareable as well as @@ -27863,7 +28617,7 @@ def cuMulticastUnbind(mcHandle, dev, size_t mcOffset, size_t size): a multiple of the value returned by :py:obj:`~.cuMulticastGetGranularity` flag :py:obj:`~.CU_MULTICAST_GRANULARITY_MINIMUM`. The `size` + `mcOffset` - must be smaller than the total size of the multicast object. + cannot be larger than the total size of the multicast object. Parameters ---------- @@ -29203,7 +29957,7 @@ def cuStreamCreate(unsigned int Flags): See Also -------- - :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` """ cdef CUstream phStream = CUstream() err = ccuda.cuStreamCreate(phStream._ptr, Flags) @@ -29249,7 +30003,7 @@ def cuStreamCreateWithPriority(unsigned int flags, int priority): See Also -------- - :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreateWithPriority` + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreateWithPriority` Notes ----- @@ -29268,8 +30022,9 @@ def cuStreamCreateWithPriority(unsigned int flags, int priority): def cuStreamGetPriority(hStream): """ Query the priority of a given stream. - Query the priority of a stream created using :py:obj:`~.cuStreamCreate` - or :py:obj:`~.cuStreamCreateWithPriority` and return the priority in + Query the priority of a stream created using + :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority` or + :py:obj:`~.cuGreenCtxStreamCreate` and return the priority in `priority`. Note that if the stream was created with a priority outside the numerical range returned by :py:obj:`~.cuCtxGetStreamPriorityRange`, this function returns the @@ -29291,7 +30046,7 @@ def cuStreamGetPriority(hStream): See Also -------- - :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cudaStreamGetPriority` + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cudaStreamGetPriority` """ cdef ccuda.CUstream chStream if hStream is None: @@ -29313,8 +30068,9 @@ def cuStreamGetPriority(hStream): def cuStreamGetFlags(hStream): """ Query the flags of a given stream. - Query the flags of a stream created using :py:obj:`~.cuStreamCreate` or - :py:obj:`~.cuStreamCreateWithPriority` and return the flags in `flags`. + Query the flags of a stream created using :py:obj:`~.cuStreamCreate`, + :py:obj:`~.cuStreamCreateWithPriority` or + :py:obj:`~.cuGreenCtxStreamCreate` and return the flags in `flags`. Parameters ---------- @@ -29333,7 +30089,7 @@ def cuStreamGetFlags(hStream): See Also -------- - :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags` + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags` """ cdef ccuda.CUstream chStream if hStream is None: @@ -29412,6 +30168,12 @@ def cuStreamGetCtx(hStream): Returns the CUDA context that the stream is associated with. + Note there is a later version of this API, + :py:obj:`~.cuStreamGetCtx_v2`. It will supplant this version in CUDA + 13.0. It is recommended to use :py:obj:`~.cuStreamGetCtx_v2` till then + as this version will return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` for + streams created via the API :py:obj:`~.cuGreenCtxStreamCreate`. + The stream handle `hStream` can refer to any of the following: - a stream created via any of the CUDA driver APIs such as @@ -29440,13 +30202,13 @@ def cuStreamGetCtx(hStream): Returns ------- CUresult - :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` pctx : :py:obj:`~.CUcontext` Returned context associated with the stream See Also -------- - :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cuStreamGetCtx_v2`, :py:obj:`~.cudaStreamCreateWithFlags` """ cdef ccuda.CUstream chStream if hStream is None: @@ -29462,6 +30224,82 @@ def cuStreamGetCtx(hStream): return (CUresult(err), pctx) {{endif}} +{{if 'cuStreamGetCtx_v2' in found_functions}} + +@cython.embedsignature(True) +def cuStreamGetCtx_v2(hStream): + """ Query the contexts associated with a stream. + + Returns the contexts that the stream is associated with. + + If the stream is associated with a green context, the API returns the + green context in `pGreenCtx` and the primary context of the associated + device in `pCtx`. + + If the stream is associated with a regular context, the API returns the + regular context in `pCtx` and NULL in `pGreenCtx`. + + The stream handle `hStream` can refer to any of the following: + + - a stream created via any of the CUDA driver APIs such as + :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority` + and :py:obj:`~.cuGreenCtxStreamCreate`, or their runtime API + equivalents such as :py:obj:`~.cudaStreamCreate`, + :py:obj:`~.cudaStreamCreateWithFlags` and + :py:obj:`~.cudaStreamCreateWithPriority`. Passing an invalid handle + will result in undefined behavior. + + - any of the special streams such as the NULL stream, + :py:obj:`~.CU_STREAM_LEGACY` and :py:obj:`~.CU_STREAM_PER_THREAD`. + The runtime API equivalents of these are also accepted, which are + NULL, :py:obj:`~.cudaStreamLegacy` and + :py:obj:`~.cudaStreamPerThread` respectively. If any of the special + handles are specified, the API will operate on the context current to + the calling thread. If a green context (that was converted via + :py:obj:`~.cuCtxFromGreenCtx()` before setting it current) is current + to the calling thread, the API will return the green context in + `pGreenCtx` and the primary context of the associated device in + `pCtx`. If a regular context is current, the API returns the regular + context in `pCtx` and NULL in `pGreenCtx`. Note that specifying + :py:obj:`~.CU_STREAM_PER_THREAD` or :py:obj:`~.cudaStreamPerThread` + will return :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` if a green context + is current to the calling thread. If no context is current to the + calling thread, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` is returned. + + Parameters + ---------- + hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t` + Handle to the stream to be queried + + Returns + ------- + CUresult + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` + pCtx : :py:obj:`~.CUcontext` + Returned regular context associated with the stream + pGreenCtx : :py:obj:`~.CUgreenCtx` + Returned green context if the stream is associated with a green + context or NULL if not + + See Also + -------- + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate` :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, + """ + cdef ccuda.CUstream chStream + if hStream is None: + chStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + chStream = phStream + else: + phStream = int(CUstream(hStream)) + chStream = phStream + cdef CUcontext pCtx = CUcontext() + cdef CUgreenCtx pGreenCtx = CUgreenCtx() + err = ccuda.cuStreamGetCtx_v2(chStream, pCtx._ptr, pGreenCtx._ptr) + return (CUresult(err), pCtx, pGreenCtx) +{{endif}} + {{if 'cuStreamWaitEvent' in found_functions}} @cython.embedsignature(True) @@ -30715,11 +31553,12 @@ def cuEventRecord(hEvent, hStream): """ Records an event. Captures in `hEvent` the contents of `hStream` at the time of this - call. `hEvent` and `hStream` must be from the same context. Calls such - as :py:obj:`~.cuEventQuery()` or :py:obj:`~.cuStreamWaitEvent()` will - then examine or wait for completion of the work that was captured. Uses - of `hStream` after this call do not modify `hEvent`. See note on - default stream behavior for what is captured in the default case. + call. `hEvent` and `hStream` must be from the same context otherwise + :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is returned. Calls such as + :py:obj:`~.cuEventQuery()` or :py:obj:`~.cuStreamWaitEvent()` will then + examine or wait for completion of the work that was captured. Uses of + `hStream` after this call do not modify `hEvent`. See note on default + stream behavior for what is captured in the default case. :py:obj:`~.cuEventRecord()` can be called multiple times on the same event and will overwrite the previously captured state. Other APIs such @@ -30775,11 +31614,12 @@ def cuEventRecordWithFlags(hEvent, hStream, unsigned int flags): """ Records an event. Captures in `hEvent` the contents of `hStream` at the time of this - call. `hEvent` and `hStream` must be from the same context. Calls such - as :py:obj:`~.cuEventQuery()` or :py:obj:`~.cuStreamWaitEvent()` will - then examine or wait for completion of the work that was captured. Uses - of `hStream` after this call do not modify `hEvent`. See note on - default stream behavior for what is captured in the default case. + call. `hEvent` and `hStream` must be from the same context otherwise + :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is returned. Calls such as + :py:obj:`~.cuEventQuery()` or :py:obj:`~.cuStreamWaitEvent()` will then + examine or wait for completion of the work that was captured. Uses of + `hStream` after this call do not modify `hEvent`. See note on default + stream behavior for what is captured in the default case. :py:obj:`~.cuEventRecordWithFlags()` can be called multiple times on the same event and will overwrite the previously captured state. Other @@ -32351,6 +33191,10 @@ def cuFuncSetAttribute(hfunc, attrib not None : CUfunction_attribute, int value) compile time, it cannot be set at runtime. Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - :py:obj:`~.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`: + Indicates whether the function can be launched with non-portable + cluster size. 1 is allowed, 0 is disallowed. + - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`: The block scheduling policy of a function. The value type is CUclusterSchedulingPolicy. @@ -33014,13 +33858,6 @@ def cuLaunchCooperativeKernel(f, unsigned int gridDimX, unsigned int gridDimY, u of blocks. Each block contains `blockDimX` x `blockDimY` x `blockDimZ` threads. - Note that the API can also be used to launch context-less kernel - :py:obj:`~.CUkernel` by querying the handle using - :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by - casting to :py:obj:`~.CUfunction`. Here, the context to launch the - kernel on will either be taken from the specified stream `hStream` or - the current context in case of NULL stream. - `sharedMemBytes` sets the amount of dynamic shared memory that will be available to each thread block. @@ -39296,6 +40133,12 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dyna Returns in `*numBlocks` the number of the maximum active blocks per streaming multiprocessor. + Note that the API can also be used with context-less kernel + :py:obj:`~.CUkernel` by querying the handle using + :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by + casting to :py:obj:`~.CUfunction`. Here, the context to use for + calculations will be the current context. + Parameters ---------- func : :py:obj:`~.CUfunction` @@ -39355,6 +40198,12 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, si be found about this feature in the "Unified L1/Texture Cache" section of the Maxwell tuning guide. + Note that the API can also be with launch context-less kernel + :py:obj:`~.CUkernel` by querying the handle using + :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by + casting to :py:obj:`~.CUfunction`. Here, the context to use for + calculations will be the current context. + Parameters ---------- func : :py:obj:`~.CUfunction` @@ -39422,6 +40271,12 @@ def cuOccupancyMaxPotentialBlockSize(func, blockSizeToDynamicSMemSize, size_t dy **View CUDA Toolkit Documentation for a C++ code example** + Note that the API can also be used with context-less kernel + :py:obj:`~.CUkernel` by querying the handle using + :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by + casting to :py:obj:`~.CUfunction`. Here, the context to use for + calculations will be the current context. + Parameters ---------- func : :py:obj:`~.CUfunction` @@ -39499,6 +40354,12 @@ def cuOccupancyMaxPotentialBlockSizeWithFlags(func, blockSizeToDynamicSMemSize, feature in the "Unified L1/Texture Cache" section of the Maxwell tuning guide. + Note that the API can also be used with context-less kernel + :py:obj:`~.CUkernel` by querying the handle using + :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by + casting to :py:obj:`~.CUfunction`. Here, the context to use for + calculations will be the current context. + Parameters ---------- func : :py:obj:`~.CUfunction` @@ -39559,6 +40420,12 @@ def cuOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize): Returns in `*dynamicSmemSize` the maximum size of dynamic shared memory to allow `numBlocks` blocks per SM. + Note that the API can also be used with context-less kernel + :py:obj:`~.CUkernel` by querying the handle using + :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by + casting to :py:obj:`~.CUfunction`. Here, the context to use for + calculations will be the current context. + Parameters ---------- func : :py:obj:`~.CUfunction` @@ -39606,6 +40473,13 @@ def cuOccupancyMaxPotentialClusterSize(func, config : Optional[CUlaunchConfig]): This function will respect the compile time launch bounds. + Note that the API can also be used with context-less kernel + :py:obj:`~.CUkernel` by querying the handle using + :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by + casting to :py:obj:`~.CUfunction`. Here, the context to use for + calculations will either be taken from the specified stream + `config->hStream` or the current context in case of NULL stream. + Parameters ---------- func : :py:obj:`~.CUfunction` @@ -39657,6 +40531,13 @@ def cuOccupancyMaxActiveClusters(func, config : Optional[CUlaunchConfig]): schedules the clusters, so the calculated occupancy is not guaranteed to be achievable. + Note that the API can also be used with context-less kernel + :py:obj:`~.CUkernel` by querying the handle using + :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by + casting to :py:obj:`~.CUfunction`. Here, the context to use for + calculations will either be taken from the specified stream + `config->hStream` or the current context in case of NULL stream. + Parameters ---------- func : :py:obj:`~.CUfunction` @@ -41074,7 +41955,8 @@ def cuTexObjectCreate(pResDesc : Optional[CUDA_RESOURCE_DESC], pTexDesc : Option specifies an alternate format for the data described by `pResDesc`, and also describes the subresource region to restrict access to when texturing. `pResViewDesc` can only be specified if the type of resource - is a CUDA array or a CUDA mipmapped array. + is a CUDA array or a CUDA mipmapped array not in a block compressed + format. Texture objects are only supported on devices of compute capability 3.0 or higher. Additionally, a texture object is an opaque value, and, as @@ -42845,12 +43727,16 @@ def cuCoredumpGetAttribute(attrib not None : CUcoredumpSettings): - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true` means that the host CPU will also create a coredump. The default value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or - or locally. + or locally. This value is deprecated as of CUDA 12.5 - raise the + :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device + abort() if needed. - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true` means that any resulting coredumps will not have a dump of GPU memory or non-reloc ELF images. The default value is :py:obj:`~.false` - unless set to :py:obj:`~.true` globally or locally. + unless set to :py:obj:`~.true` globally or locally. This attribute is + deprecated as of CUDA 12.5, please use + :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead. - :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER`: Bool where :py:obj:`~.true` means that a coredump can be created by writing to @@ -42872,6 +43758,38 @@ def cuCoredumpGetAttribute(attrib not None : CUcoredumpSettings): the host name of the machine running the CUDA application and :py:obj:`~.PID` is the process ID of the CUDA application. + - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to + allow granular control the data contained in a coredump specified as + a bitwise OR combination of the following values: + + - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump + generation returns to its default settings of including all memory + regions that it is able to access + + - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump + will not include the data from CUDA source modules that are not + relocated at runtime. + + - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not + include device-side global data that does not belong to any + context. + + - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not + include grid-scale shared memory for the warp that the dumped + kernel belonged to. + + - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not + include local memory from the kernel. + + - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the + above options. Equiavlent to setting the + :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`. + + - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will + not raise an abort() in the host CPU process. Same functional goal + as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the + default behavior. + Parameters ---------- attrib : :py:obj:`~.CUcoredumpSettings` @@ -42923,11 +43841,16 @@ def cuCoredumpGetAttributeGlobal(attrib not None : CUcoredumpSettings): - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true` means that the host CPU will also create a coredump. The default - value is :py:obj:`~.true`. + value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or + or locally. This value is deprecated as of CUDA 12.5 - raise the + :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device + abort() if needed. - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true` means that any resulting coredumps will not have a dump of GPU memory - or non-reloc ELF images. The default value is :py:obj:`~.false`. + or non-reloc ELF images. The default value is :py:obj:`~.false`. This + attribute is deprecated as of CUDA 12.5, please use + :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead. - :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER`: Bool where :py:obj:`~.true` means that a coredump can be created by writing to @@ -42948,6 +43871,38 @@ def cuCoredumpGetAttributeGlobal(attrib not None : CUcoredumpSettings): the host name of the machine running the CUDA application and :py:obj:`~.PID` is the process ID of the CUDA application. + - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to + allow granular control the data contained in a coredump specified as + a bitwise OR combination of the following values: + + - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump + generation returns to its default settings of including all memory + regions that it is able to access + + - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump + will not include the data from CUDA source modules that are not + relocated at runtime. + + - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not + include device-side global data that does not belong to any + context. + + - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not + include grid-scale shared memory for the warp that the dumped + kernel belonged to. + + - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not + include local memory from the kernel. + + - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the + above options. Equiavlent to setting the + :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`. + + - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will + not raise an abort() in the host CPU process. Same functional goal + as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the + default behavior. + Parameters ---------- attrib : :py:obj:`~.CUcoredumpSettings` @@ -42988,8 +43943,8 @@ def cuCoredumpSetAttribute(attrib not None : CUcoredumpSettings, value): An important design decision to note is that any coredump environment variable values set before CUDA initializes will take permanent - precedence over any values set with this this function. This decision - was made to ensure no change in behavior for any users that may be + precedence over any values set with this function. This decision was + made to ensure no change in behavior for any users that may be currently using these variables to get coredumps. `*value` shall contain the requested value specified by `set`. It is up @@ -43018,11 +43973,16 @@ def cuCoredumpSetAttribute(attrib not None : CUcoredumpSettings, value): - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true` means that the host CPU will also create a coredump. The default - value is :py:obj:`~.true`. + value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or + or locally. This value is deprecated as of CUDA 12.5 - raise the + :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device + abort() if needed. - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true` means that any resulting coredumps will not have a dump of GPU memory - or non-reloc ELF images. The default value is :py:obj:`~.false`. + or non-reloc ELF images. The default value is :py:obj:`~.false`. This + attribute is deprecated as of CUDA 12.5, please use + :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead. - :py:obj:`~.CU_COREDUMP_FILE`: String of up to 1023 characters that defines the location where any coredumps generated by this context @@ -43031,6 +43991,38 @@ def cuCoredumpSetAttribute(attrib not None : CUcoredumpSettings, value): host name of the machine running the CUDA applications and :py:obj:`~.PID` is the process ID of the CUDA application. + - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to + allow granular control the data contained in a coredump specified as + a bitwise OR combination of the following values: + + - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump + generation returns to its default settings of including all memory + regions that it is able to access + + - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump + will not include the data from CUDA source modules that are not + relocated at runtime. + + - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not + include device-side global data that does not belong to any + context. + + - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not + include grid-scale shared memory for the warp that the dumped + kernel belonged to. + + - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not + include local memory from the kernel. + + - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the + above options. Equiavlent to setting the + :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`. + + - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will + not raise an abort() in the host CPU process. Same functional goal + as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the + default behavior. + Parameters ---------- attrib : :py:obj:`~.CUcoredumpSettings` @@ -43071,8 +44063,8 @@ def cuCoredumpSetAttributeGlobal(attrib not None : CUcoredumpSettings, value): An important design decision to note is that any coredump environment variable values set before CUDA initializes will take permanent - precedence over any values set with this this function. This decision - was made to ensure no change in behavior for any users that may be + precedence over any values set with this function. This decision was + made to ensure no change in behavior for any users that may be currently using these variables to get coredumps. `*value` shall contain the requested value specified by `set`. It is up @@ -43092,11 +44084,16 @@ def cuCoredumpSetAttributeGlobal(attrib not None : CUcoredumpSettings, value): - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true` means that the host CPU will also create a coredump. The default - value is :py:obj:`~.true`. + value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or + or locally. This value is deprecated as of CUDA 12.5 - raise the + :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device + abort() if needed. - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true` means that any resulting coredumps will not have a dump of GPU memory - or non-reloc ELF images. The default value is :py:obj:`~.false`. + or non-reloc ELF images. The default value is :py:obj:`~.false`. This + attribute is deprecated as of CUDA 12.5, please use + :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead. - :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER`: Bool where :py:obj:`~.true` means that a coredump can be created by writing to @@ -43119,6 +44116,38 @@ def cuCoredumpSetAttributeGlobal(attrib not None : CUcoredumpSettings, value): the host name of the machine running the CUDA application and :py:obj:`~.PID` is the process ID of the CUDA application. + - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to + allow granular control the data contained in a coredump specified as + a bitwise OR combination of the following values: + + - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump + generation returns to its default settings of including all memory + regions that it is able to access + + - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump + will not include the data from CUDA source modules that are not + relocated at runtime. + + - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not + include device-side global data that does not belong to any + context. + + - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not + include grid-scale shared memory for the warp that the dumped + kernel belonged to. + + - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not + include local memory from the kernel. + + - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the + above options. Equiavlent to setting the + :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`. + + - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will + not raise an abort() in the host CPU process. Same functional goal + as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the + default behavior. + Parameters ---------- attrib : :py:obj:`~.CUcoredumpSettings` @@ -43486,25 +44515,40 @@ def cuDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[CUdevRe When creating the groups, the API will take into account the performance and functional characteristics of the input resource, and guarantee a split that will create a disjoint set of symmetrical - partitions. This may lead to less groups created than purely dividing + partitions. This may lead to fewer groups created than purely dividing the total SM count by the `minCount` due to cluster requirements or alignment and granularity requirements for the minCount. - The `remainder` set, might not have the same functional or performance + The `remainder` set does not have the same functional or performance guarantees as the groups in `result`. Its use should be carefully planned and future partitions of the `remainder` set are discouraged. + The following flags are supported: + + - `CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING` : Lower the minimum + SM count and alignment, and treat each SM independent of its + hierarchy. This allows more fine grained partitions but at the cost + of advanced features (such as large clusters on compute capability + 9.0+). + + - `CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE` : Compute + Capability 9.0+ only. Attempt to create groups that may allow for + maximally sized thread clusters. This can be queried post green + context creation using + :py:obj:`~.cuOccupancyMaxPotentialClusterSize`. + A successful API call must either have: - A valid array of `result` pointers of size passed in `nbGroups`, with - `Input` of type `CU_DEV_RESOURCE_TYPE_SM`. Value of `minCount` must - be between 0 and the SM count specified in `input`. `remaining` and - `useFlags` are optional. + `input` of type `CU_DEV_RESOURCE_TYPE_SM`. Value of `minCount` must + be between 0 and the SM count specified in `input`. `remaining` may + be NULL. - NULL passed in for `result`, with a valid integer pointer in - `nbGroups` and `Input` of type `CU_DEV_RESOURCE_TYPE_SM`. Value of + `nbGroups` and `input` of type `CU_DEV_RESOURCE_TYPE_SM`. Value of `minCount` must be between 0 and the SM count specified in `input`. - This queries the number of groups that would be created by the API. + `remaining` may be NULL. This queries the number of groups that would + be created by the API. Note: The API is not supported on 32-bit platforms. @@ -43518,7 +44562,8 @@ def cuDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[CUdevRe `CU_DEV_RESOURCE_TYPE_SM` resource. useFlags : unsigned int Flags specifying how these partitions are used or which constraints - to abide by when splitting the input. + to abide by when splitting the input. Zero is valid for default + behavior. minCount : unsigned int Minimum number of SMs required @@ -43565,12 +44610,23 @@ def cuDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[CUdevRe def cuDevResourceGenerateDesc(resources : Optional[List[CUdevResource]], unsigned int nbResources): """ Generate a resource descriptor. - Generates a resource descriptor with the set of resources specified in - `resources`. The generated resource descriptor is necessary for the - creation of green contexts via the :py:obj:`~.cuGreenCtxCreate` API. - The API expects `nbResources` == 1, as there is only one type of - resource and merging the same types of resource is currently not - supported. + Generates a single resource descriptor with the set of resources + specified in `resources`. The generated resource descriptor is + necessary for the creation of green contexts via the + :py:obj:`~.cuGreenCtxCreate` API. Resources of the same type can be + passed in, provided they meet the requirements as noted below. + + A successful API call must have: + + - A valid output pointer for the `phDesc` descriptor as well as a valid + array of `resources` pointers, with the array size passed in + `nbResources`. If multiple resources are provided in `resources`, the + device they came from must be the same, otherwise + CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION is returned. If multiple + resources are provided in `resources` and they are of type + :py:obj:`~.CU_DEV_RESOURCE_TYPE_SM`, they must be outputs (whether + `result` or `remaining`) from the same split API instance, otherwise + CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION is returned. Note: The API is not supported on 32-bit platforms. @@ -43616,9 +44672,10 @@ def cuDevResourceGenerateDesc(resources : Optional[List[CUdevResource]], unsigne def cuGreenCtxRecordEvent(hCtx, hEvent): """ Records an event. - Captures in \phEvent all the activities of the green context of \phCtx - at the time of this call. \phEvent and \phCtx must be from the same - CUDA context. Calls such as :py:obj:`~.cuEventQuery()` or + Captures in `hEvent` all the activities of the green context of `hCtx` + at the time of this call. `hEvent` and `hCtx` must be from the same + primary context otherwise :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is + returned. Calls such as :py:obj:`~.cuEventQuery()` or :py:obj:`~.cuGreenCtxWaitEvent()` will then examine or wait for completion of the work that was captured. Uses of `hCtx` after this call do not modify `hEvent`. @@ -43633,15 +44690,15 @@ def cuGreenCtxRecordEvent(hCtx, hEvent): Returns ------- CUresult - :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` + :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` See Also -------- - :py:obj:`~.cuGreenCtxWaitEvent`, :py:obj:`~.cuEventRecord` + :py:obj:`~.cuGreenCtxWaitEvent`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuCtxWaitEvent` Notes ----- - The API will return an error if the specified green context `hCtx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures. + The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` if the specified green context `hCtx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures. """ cdef ccuda.CUevent chEvent if hEvent is None: @@ -43671,31 +44728,33 @@ def cuGreenCtxRecordEvent(hCtx, hEvent): def cuGreenCtxWaitEvent(hCtx, hEvent): """ Make a green context wait on an event. - Makes all future work submitted to green context \phCtx wait for all - work captured in \phEvent. The synchronization will be performed on the + Makes all future work submitted to green context `hCtx` wait for all + work captured in `hEvent`. The synchronization will be performed on the device and will not block the calling CPU thread. See - :py:obj:`~.cuGreenCtxRecordEvent()` for details on what is captured by - an event. + :py:obj:`~.cuGreenCtxRecordEvent()` or :py:obj:`~.cuEventRecord()`, for + details on what is captured by an event. Parameters ---------- hCtx : :py:obj:`~.CUgreenCtx` Green context to wait hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t` - Event to wait on (may not be NULL) + Event to wait on Returns ------- CUresult - :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` See Also -------- - :py:obj:`~.cuGreenCtxRecordEvent`, :py:obj:`~.cuStreamWaitEvent` + :py:obj:`~.cuGreenCtxRecordEvent`, :py:obj:`~.cuStreamWaitEvent` :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuCtxWaitEvent` Notes ----- - The API will return an error and invalidate the capture if the specified event `hEvent` is part of an ongoing capture sequence. + `hEvent` may be from a different context or device than `hCtx`. + + The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` and invalidate the capture if the specified event `hEvent` is part of an ongoing capture sequence or if the specified green context `hCtx` has a stream in the capture mode. """ cdef ccuda.CUevent chEvent if hEvent is None: @@ -43731,8 +44790,12 @@ def cuStreamGetGreenCtx(hStream): The stream handle `hStream` can refer to any of the following: - a stream created via any of the CUDA driver APIs such as - :py:obj:`~.cuStreamCreate`. If during stream creation the context - that was active in the calling thread was obtained with + :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority` + and :py:obj:`~.cuGreenCtxStreamCreate`, or their runtime API + equivalents such as :py:obj:`~.cudaStreamCreate`, + :py:obj:`~.cudaStreamCreateWithFlags` and + :py:obj:`~.cudaStreamCreateWithPriority`. If during stream creation + the context that was active in the calling thread was obtained with cuCtxFromGreenCtx, that green context is returned in `phCtx`. Otherwise, `*phCtx` is set to NULL instead. @@ -43757,7 +44820,7 @@ def cuStreamGetGreenCtx(hStream): See Also -------- - :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetCtx_v2`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` """ cdef ccuda.CUstream chStream if hStream is None: @@ -43773,6 +44836,79 @@ def cuStreamGetGreenCtx(hStream): return (CUresult(err), phCtx) {{endif}} +{{if 'cuGreenCtxStreamCreate' in found_functions}} + +@cython.embedsignature(True) +def cuGreenCtxStreamCreate(greenCtx, unsigned int flags, int priority): + """ Create a stream for use in the green context. + + Creates a stream for use in the specified green context `greenCtx` and + returns a handle in `phStream`. The stream can be destroyed by calling + :py:obj:`~.cuStreamDestroy()`. Note that the API ignores the context + that is current to the calling thread and creates a stream in the + specified green context `greenCtx`. + + The supported values for `flags` are: + + - :py:obj:`~.CU_STREAM_NON_BLOCKING`: This must be specified. It + indicates that work running in the created stream may run + concurrently with work in the default stream, and that the created + stream should perform no implicit synchronization with the default + stream. + + Specifying `priority` affects the scheduling priority of work in the + stream. Priorities provide a hint to preferentially run work with + higher priority when possible, but do not preempt already-running work + or provide any other functional guarantee on execution order. + `priority` follows a convention where lower numbers represent higher + priorities. '0' represents default priority. The range of meaningful + numerical priorities can be queried using + :py:obj:`~.cuCtxGetStreamPriorityRange`. If the specified priority is + outside the numerical range returned by + :py:obj:`~.cuCtxGetStreamPriorityRange`, it will automatically be + clamped to the lowest or the highest number in the range. + + Parameters + ---------- + greenCtx : :py:obj:`~.CUgreenCtx` + Green context for which to create the stream for + flags : unsigned int + Flags for stream creation. `CU_STREAM_NON_BLOCKING` must be + specified. + priority : int + Stream priority. Lower numbers represent higher priorities. See + :py:obj:`~.cuCtxGetStreamPriorityRange` for more information about + meaningful stream priorities that can be passed. + + Returns + ------- + CUresult + :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY` + phStream : :py:obj:`~.CUstream` + Returned newly created stream + + See Also + -------- + :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuGreenCtxCreate` :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreateWithPriority` + + Notes + ----- + In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations. + """ + cdef ccuda.CUgreenCtx cgreenCtx + if greenCtx is None: + cgreenCtx = 0 + elif isinstance(greenCtx, (CUgreenCtx,)): + pgreenCtx = int(greenCtx) + cgreenCtx = pgreenCtx + else: + pgreenCtx = int(CUgreenCtx(greenCtx)) + cgreenCtx = pgreenCtx + cdef CUstream phStream = CUstream() + err = ccuda.cuGreenCtxStreamCreate(phStream._ptr, cgreenCtx, flags, priority) + return (CUresult(err), phStream) +{{endif}} + {{if 'cuProfilerStart' in found_functions}} @cython.embedsignature(True) @@ -45053,6 +46189,9 @@ def sizeof(objType): {{if 'CUasyncCallbackHandle' in found_types}} if objType == CUasyncCallbackHandle: return sizeof(ccuda.CUasyncCallbackHandle){{endif}} + {{if 'CUgreenCtx' in found_types}} + if objType == CUgreenCtx: + return sizeof(ccuda.CUgreenCtx){{endif}} {{if 'struct CUuuid_st' in found_types}} if objType == CUuuid_st: return sizeof(ccuda.CUuuid_st){{endif}} @@ -45263,6 +46402,18 @@ def sizeof(objType): {{if 'CUexecAffinityParam' in found_types}} if objType == CUexecAffinityParam: return sizeof(ccuda.CUexecAffinityParam){{endif}} + {{if 'struct CUctxCigParam_st' in found_types}} + if objType == CUctxCigParam_st: + return sizeof(ccuda.CUctxCigParam_st){{endif}} + {{if 'CUctxCigParam' in found_types}} + if objType == CUctxCigParam: + return sizeof(ccuda.CUctxCigParam){{endif}} + {{if 'struct CUctxCreateParams_st' in found_types}} + if objType == CUctxCreateParams_st: + return sizeof(ccuda.CUctxCreateParams_st){{endif}} + {{if 'CUctxCreateParams' in found_types}} + if objType == CUctxCreateParams: + return sizeof(ccuda.CUctxCreateParams){{endif}} {{if 'struct CUlibraryHostUniversalFunctionAndDataTable_st' in found_types}} if objType == CUlibraryHostUniversalFunctionAndDataTable_st: return sizeof(ccuda.CUlibraryHostUniversalFunctionAndDataTable_st){{endif}} @@ -45602,9 +46753,6 @@ def sizeof(objType): {{if 'CUgraphNodeParams' in found_types}} if objType == CUgraphNodeParams: return sizeof(ccuda.CUgraphNodeParams){{endif}} - {{if 'CUgreenCtx' in found_types}} - if objType == CUgreenCtx: - return sizeof(ccuda.CUgreenCtx){{endif}} {{if 'CUdevResourceDesc' in found_types}} if objType == CUdevResourceDesc: return sizeof(ccuda.CUdevResourceDesc){{endif}} diff --git a/cuda/cudart.pxd.in b/cuda/cudart.pxd.in index a848544d..a23c879b 100644 --- a/cuda/cudart.pxd.in +++ b/cuda/cudart.pxd.in @@ -2626,7 +2626,10 @@ cdef class cudaLaunchAttributeValue: Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent. deviceUpdatableKernelNode : anon_struct23 Value of launch attribute - ::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + cudaLaunchAttributeDeviceUpdatableKernelNode. + sharedMemCarveout : unsigned int + Value of launch attribute + cudaLaunchAttributePreferredSharedMemoryCarveout. Methods ------- @@ -3159,7 +3162,10 @@ cdef class cudaStreamAttrValue(cudaLaunchAttributeValue): Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent. deviceUpdatableKernelNode : anon_struct23 Value of launch attribute - ::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + cudaLaunchAttributeDeviceUpdatableKernelNode. + sharedMemCarveout : unsigned int + Value of launch attribute + cudaLaunchAttributePreferredSharedMemoryCarveout. Methods ------- @@ -3218,7 +3224,10 @@ cdef class cudaKernelNodeAttrValue(cudaLaunchAttributeValue): Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent. deviceUpdatableKernelNode : anon_struct23 Value of launch attribute - ::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + cudaLaunchAttributeDeviceUpdatableKernelNode. + sharedMemCarveout : unsigned int + Value of launch attribute + cudaLaunchAttributePreferredSharedMemoryCarveout. Methods ------- diff --git a/cuda/cudart.pyx.in b/cuda/cudart.pyx.in index 75270d60..75583c12 100644 --- a/cuda/cudart.pyx.in +++ b/cuda/cudart.pyx.in @@ -300,6 +300,8 @@ cudaKernelNodeAttributeMemSyncDomainMap = ccudart.cudaKernelNodeAttributeMemSync cudaKernelNodeAttributeMemSyncDomain = ccudart.cudaKernelNodeAttributeMemSyncDomain +cudaKernelNodeAttributePreferredSharedMemoryCarveout = ccudart.cudaKernelNodeAttributePreferredSharedMemoryCarveout + cudaKernelNodeAttributeDeviceUpdatableKernelNode = ccudart.cudaKernelNodeAttributeDeviceUpdatableKernelNode cudaSurfaceType1D = ccudart.cudaSurfaceType1D @@ -1345,6 +1347,18 @@ class cudaLaunchAttributeID(IntEnum): #: updatable nodes, the graph must be uploaded before it is launched #: again. cudaLaunchAttributeDeviceUpdatableKernelNode = ccudart.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode{{endif}} + {{if 'cudaLaunchAttributePreferredSharedMemoryCarveout' in found_values}} + + #: Valid for launches. On devices where the L1 cache and shared memory + #: use the same hardware resources, setting + #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a + #: percentage between 0-100 signals sets the shared memory carveout + #: preference in percent of the total shared memory for that kernel + #: launch. This attribute takes precedence over + #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is + #: only a hint, and the driver can choose a different configuration if + #: required for the launch. + cudaLaunchAttributePreferredSharedMemoryCarveout = ccudart.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout{{endif}} {{endif}} {{if 'cudaAsyncNotificationType_enum' in found_types}} @@ -3395,7 +3409,7 @@ class cudaDeviceAttr(IntEnum): {{if 'cudaDevAttrNumaConfig' in found_values}} #: NUMA configuration of a device: value is of type - #: cudaDeviceNumaConfig enum + #: :py:obj:`~.cudaDeviceNumaConfig` enum cudaDevAttrNumaConfig = ccudart.cudaDeviceAttr.cudaDevAttrNumaConfig{{endif}} {{if 'cudaDevAttrNumaId' in found_values}} @@ -3412,6 +3426,10 @@ class cudaDeviceAttr(IntEnum): #: NUMA ID of the host node closest to the device. Returns -1 when #: system does not support NUMA. cudaDevAttrHostNumaId = ccudart.cudaDeviceAttr.cudaDevAttrHostNumaId{{endif}} + {{if 'cudaDevAttrD3D12CigSupported' in found_values}} + + #: Device supports CIG with D3D12. + cudaDevAttrD3D12CigSupported = ccudart.cudaDeviceAttr.cudaDevAttrD3D12CigSupported{{endif}} {{if 'cudaDevAttrMax' in found_values}} cudaDevAttrMax = ccudart.cudaDeviceAttr.cudaDevAttrMax{{endif}} {{endif}} @@ -3555,6 +3573,11 @@ class cudaMemAllocationHandleType(IntEnum): #: Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE) cudaMemHandleTypeWin32Kmt = ccudart.cudaMemAllocationHandleType.cudaMemHandleTypeWin32Kmt{{endif}} + {{if 'cudaMemHandleTypeFabric' in found_values}} + + #: Allows a fabric handle to be used for exporting. + #: (cudaMemFabricHandle_t) + cudaMemHandleTypeFabric = ccudart.cudaMemAllocationHandleType.cudaMemHandleTypeFabric{{endif}} {{endif}} {{if 'cudaGraphMemAttributeType' in found_types}} @@ -4030,7 +4053,7 @@ class cudaGraphInstantiateFlags(IntEnum): class cudaDeviceNumaConfig(IntEnum): """ - + CUDA device NUMA config """ {{if 'cudaDeviceNumaConfigNone' in found_values}} @@ -4322,6 +4345,18 @@ class cudaStreamAttrID(IntEnum): #: updatable nodes, the graph must be uploaded before it is launched #: again. cudaLaunchAttributeDeviceUpdatableKernelNode = ccudart.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode{{endif}} + {{if 'cudaLaunchAttributePreferredSharedMemoryCarveout' in found_values}} + + #: Valid for launches. On devices where the L1 cache and shared memory + #: use the same hardware resources, setting + #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a + #: percentage between 0-100 signals sets the shared memory carveout + #: preference in percent of the total shared memory for that kernel + #: launch. This attribute takes precedence over + #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is + #: only a hint, and the driver can choose a different configuration if + #: required for the launch. + cudaLaunchAttributePreferredSharedMemoryCarveout = ccudart.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout{{endif}} {{endif}} {{if 'cudaLaunchAttributeID' in found_types}} @@ -4464,6 +4499,18 @@ class cudaKernelNodeAttrID(IntEnum): #: updatable nodes, the graph must be uploaded before it is launched #: again. cudaLaunchAttributeDeviceUpdatableKernelNode = ccudart.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode{{endif}} + {{if 'cudaLaunchAttributePreferredSharedMemoryCarveout' in found_values}} + + #: Valid for launches. On devices where the L1 cache and shared memory + #: use the same hardware resources, setting + #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a + #: percentage between 0-100 signals sets the shared memory carveout + #: preference in percent of the total shared memory for that kernel + #: launch. This attribute takes precedence over + #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is + #: only a hint, and the driver can choose a different configuration if + #: required for the launch. + cudaLaunchAttributePreferredSharedMemoryCarveout = ccudart.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout{{endif}} {{endif}} {{if 'cudaArray_t' in found_types}} @@ -12434,7 +12481,10 @@ cdef class cudaLaunchAttributeValue: Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent. deviceUpdatableKernelNode : anon_struct23 Value of launch attribute - ::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. + cudaLaunchAttributeDeviceUpdatableKernelNode. + sharedMemCarveout : unsigned int + Value of launch attribute + cudaLaunchAttributePreferredSharedMemoryCarveout. Methods ------- @@ -12513,6 +12563,10 @@ cdef class cudaLaunchAttributeValue: str_list += ['deviceUpdatableKernelNode :\n' + '\n'.join([' ' + line for line in str(self.deviceUpdatableKernelNode).splitlines()])] except ValueError: str_list += ['deviceUpdatableKernelNode : '] + try: + str_list += ['sharedMemCarveout : ' + str(self.sharedMemCarveout)] + except ValueError: + str_list += ['sharedMemCarveout : '] return '\n'.join(str_list) else: return '' @@ -12605,6 +12659,12 @@ cdef class cudaLaunchAttributeValue: @deviceUpdatableKernelNode.setter def deviceUpdatableKernelNode(self, deviceUpdatableKernelNode not None : anon_struct23): string.memcpy(&self._ptr[0].deviceUpdatableKernelNode, deviceUpdatableKernelNode.getPtr(), sizeof(self._ptr[0].deviceUpdatableKernelNode)) + @property + def sharedMemCarveout(self): + return self._ptr[0].sharedMemCarveout + @sharedMemCarveout.setter + def sharedMemCarveout(self, unsigned int sharedMemCarveout): + self._ptr[0].sharedMemCarveout = sharedMemCarveout {{endif}} {{if 'struct cudaLaunchAttribute_st' in found_types}} @@ -13581,14 +13641,17 @@ def cudaDeviceReset(): current device in the current process. It is the caller's responsibility to ensure that the resources are not accessed or passed in subsequent API calls and doing so will result in undefined behavior. - These resources include CUDA types such as :py:obj:`~.cudaStream_t`, + These resources include CUDA types :py:obj:`~.cudaStream_t`, :py:obj:`~.cudaEvent_t`, :py:obj:`~.cudaArray_t`, - :py:obj:`~.cudaMipmappedArray_t`, :py:obj:`~.cudaTextureObject_t`, - :py:obj:`~.cudaSurfaceObject_t`, :py:obj:`~.textureReference`, - :py:obj:`~.surfaceReference`, :py:obj:`~.cudaExternalMemory_t`, - :py:obj:`~.cudaExternalSemaphore_t` and - :py:obj:`~.cudaGraphicsResource_t`. Any subsequent API call to this - device will reinitialize the device. + :py:obj:`~.cudaMipmappedArray_t`, :py:obj:`~.cudaPitchedPtr`, + :py:obj:`~.cudaTextureObject_t`, :py:obj:`~.cudaSurfaceObject_t`, + :py:obj:`~.textureReference`, :py:obj:`~.surfaceReference`, + :py:obj:`~.cudaExternalMemory_t`, :py:obj:`~.cudaExternalSemaphore_t` + and :py:obj:`~.cudaGraphicsResource_t`. These resources also include + memory allocations by :py:obj:`~.cudaMalloc`, + :py:obj:`~.cudaMallocHost`, :py:obj:`~.cudaMallocManaged` and + :py:obj:`~.cudaMallocPitch`. Any subsequent API call to this device + will reinitialize the device. Note that this function will reset the device immediately. It is the caller's responsibility to ensure that the device is not being accessed @@ -13603,6 +13666,12 @@ def cudaDeviceReset(): See Also -------- :py:obj:`~.cudaDeviceSynchronize` + + Notes + ----- + :py:obj:`~.cudaDeviceReset()` will not destroy memory allocations by :py:obj:`~.cudaMallocAsync()` and :py:obj:`~.cudaMallocFromPoolAsync()`. These memory allocations need to be destroyed explicitly. + + If a non-primary :py:obj:`~.CUcontext` is current to the thread, :py:obj:`~.cudaDeviceReset()` will destroy only the internal CUDA RT state for that :py:obj:`~.CUcontext`. """ err = ccudart.cudaDeviceReset() return (cudaError_t(err),) @@ -14070,9 +14139,10 @@ def cudaIpcGetEventHandle(event): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode. Users can test their device - for IPC functionality by calling :py:obj:`~.cudaDeviceGetAttribute` - with :py:obj:`~.cudaDevAttrIpcEventSupport` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with + :py:obj:`~.cudaDevAttrIpcEventSupport` Parameters ---------- @@ -14124,9 +14194,10 @@ def cudaIpcOpenEventHandle(handle not None : cudaIpcEventHandle_t): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode. Users can test their device - for IPC functionality by calling :py:obj:`~.cudaDeviceGetAttribute` - with :py:obj:`~.cudaDevAttrIpcEventSupport` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with + :py:obj:`~.cudaDevAttrIpcEventSupport` Parameters ---------- @@ -14167,9 +14238,10 @@ def cudaIpcGetMemHandle(devPtr): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode. Users can test their device - for IPC functionality by calling :py:obj:`~.cudaDeviceGetAttribute` - with :py:obj:`~.cudaDevAttrIpcEventSupport` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with + :py:obj:`~.cudaDevAttrIpcEventSupport` Parameters ---------- @@ -14231,9 +14303,10 @@ def cudaIpcOpenMemHandle(handle not None : cudaIpcMemHandle_t, unsigned int flag IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode. Users can test their device - for IPC functionality by calling :py:obj:`~.cudaDeviceGetAttribute` - with :py:obj:`~.cudaDevAttrIpcEventSupport` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with + :py:obj:`~.cudaDevAttrIpcEventSupport` Parameters ---------- @@ -14281,9 +14354,10 @@ def cudaIpcCloseMemHandle(devPtr): IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. IPC functionality on - Windows is restricted to GPUs in TCC mode. Users can test their device - for IPC functionality by calling :py:obj:`~.cudaDeviceGetAttribute` - with :py:obj:`~.cudaDevAttrIpcEventSupport` + Windows is supported for compatibility purposes but not recommended as + it comes with performance cost. Users can test their device for IPC + functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with + :py:obj:`~.cudaDevAttrIpcEventSupport` Parameters ---------- @@ -15357,6 +15431,11 @@ def cudaDeviceGetAttribute(attr not None : cudaDeviceAttr, int device): - :py:obj:`~.cudaDevAttrIpcEventSupport`: 1 if the device supports IPC Events. + - :py:obj:`~.cudaDevAttrNumaConfig`: NUMA configuration of a device: + value is of type :py:obj:`~.cudaDeviceNumaConfig` enum + + - :py:obj:`~.cudaDevAttrNumaId`: NUMA node ID of the GPU memory + Parameters ---------- attr : :py:obj:`~.cudaDeviceAttr` @@ -18698,6 +18777,35 @@ def cudaFuncSetAttribute(func, attr not None : cudaFuncAttribute, int value): a hint, and the driver can choose a different ratio if required to execute the function. + - :py:obj:`~.cudaFuncAttributeRequiredClusterWidth`: The required + cluster width in blocks. The width, height, and depth values must + either all be 0 or all be positive. The validity of the cluster + dimensions is checked at launch time. If the value is set during + compile time, it cannot be set at runtime. Setting it at runtime will + return cudaErrorNotPermitted. + + - :py:obj:`~.cudaFuncAttributeRequiredClusterHeight`: The required + cluster height in blocks. The width, height, and depth values must + either all be 0 or all be positive. The validity of the cluster + dimensions is checked at launch time. If the value is set during + compile time, it cannot be set at runtime. Setting it at runtime will + return cudaErrorNotPermitted. + + - :py:obj:`~.cudaFuncAttributeRequiredClusterDepth`: The required + cluster depth in blocks. The width, height, and depth values must + either all be 0 or all be positive. The validity of the cluster + dimensions is checked at launch time. If the value is set during + compile time, it cannot be set at runtime. Setting it at runtime will + return cudaErrorNotPermitted. + + - :py:obj:`~.cudaFuncAttributeNonPortableClusterSizeAllowed`: Indicates + whether the function can be launched with non-portable cluster size. + 1 is allowed, 0 is disallowed. + + - :py:obj:`~.cudaFuncAttributeClusterSchedulingPolicyPreference`: The + block scheduling policy of a function. The value type is + cudaClusterSchedulingPolicy. + cudaLaunchKernel (C++ API), cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cudaFuncGetAttributes (C API)`, @@ -23379,6 +23487,21 @@ def cudaMemPoolCreate(poolProps : Optional[cudaMemPoolProps]): :py:obj:`~.cudaMemPoolProps.maxSize`. If set to 0, the maximum size of the pool will default to a system dependent value. + Applications can set :py:obj:`~.cudaMemPoolProps.handleTypes` to + :py:obj:`~.cudaMemHandleTypeFabric` in order to create + :py:obj:`~.cudaMemPool_t` suitable for sharing within an IMEX domain. + An IMEX domain is either an OS instance or a group of securely + connected OS instances using the NVIDIA IMEX daemon. An IMEX channel is + a global resource within the IMEX domain that represents a logical + entity that aims to provide fine grained accessibility control for the + participating processes. When exporter and importer CUDA processes have + been granted access to the same IMEX channel, they can securely share + memory. If the allocating process does not have access setup for an + IMEX channel, attempting to export a :py:obj:`~.CUmemoryPool` with + :py:obj:`~.cudaMemHandleTypeFabric` will result in + :py:obj:`~.cudaErrorNotPermitted`. The nvidia-modprobe CLI provides + more information regarding setting up of IMEX channels. + Parameters ---------- poolProps : :py:obj:`~.cudaMemPoolProps` @@ -24335,7 +24458,8 @@ def cudaCreateTextureObject(pResDesc : Optional[cudaResourceDesc], pTexDesc : Op specifies an alternate format for the data described by `pResDesc`, and also describes the subresource region to restrict access to when texturing. `pResViewDesc` can only be specified if the type of resource - is a CUDA array or a CUDA mipmapped array. + is a CUDA array or a CUDA mipmapped array not in a block compressed + format. Texture objects are only supported on devices of compute capability 3.0 or higher. Additionally, a texture object is an opaque value, and, as @@ -30260,6 +30384,10 @@ def cudaGetDriverEntryPoint(char* symbol, unsigned long long flags): consists of function pointer typedefs for driver APIs defined in cuda.h. + The API will return :py:obj:`~.cudaSuccess` and set the returned + `funcPtr` if the requested driver function is valid and supported on + the platform. + The API will return :py:obj:`~.cudaSuccess` and set the returned `funcPtr` to NULL if the requested driver function is not supported on the platform, no ABI compatible driver function exists for the CUDA @@ -30330,6 +30458,109 @@ def cudaGetDriverEntryPoint(char* symbol, unsigned long long flags): return (cudaError_t(err), funcPtr, cudaDriverEntryPointQueryResult(driverStatus)) {{endif}} +{{if 'cudaGetDriverEntryPointByVersion' in found_functions}} + +@cython.embedsignature(True) +def cudaGetDriverEntryPointByVersion(char* symbol, unsigned int cudaVersion, unsigned long long flags): + """ Returns the requested driver API function pointer by CUDA version. + + Returns in `**funcPtr` the address of the CUDA driver function for the + requested flags and CUDA driver version. + + The CUDA version is specified as (1000 * major + 10 * minor), so CUDA + 11.2 should be specified as 11020. For a requested driver symbol, if + the specified CUDA version is greater than or equal to the CUDA version + in which the driver symbol was introduced, this API will return the + function pointer to the corresponding versioned function. + + The pointer returned by the API should be cast to a function pointer + matching the requested driver function's definition in the API header + file. The function pointer typedef can be picked up from the + corresponding typedefs header file. For example, cudaTypedefs.h + consists of function pointer typedefs for driver APIs defined in + cuda.h. + + For the case where the CUDA version requested is greater than the CUDA + Toolkit installed, there may not be an appropriate function pointer + typedef in the corresponding header file and may need a custom typedef + to match the driver function signature returned. This can be done by + getting the typedefs from a later toolkit or creating appropriately + matching custom function typedefs. + + The API will return :py:obj:`~.cudaSuccess` and set the returned + `funcPtr` if the requested driver function is valid and supported on + the platform. + + The API will return :py:obj:`~.cudaSuccess` and set the returned + `funcPtr` to NULL if the requested driver function is not supported on + the platform, no ABI compatible driver function exists for the + requested version or if the driver symbol is invalid. + + It will also set the optional `driverStatus` to one of the values in + :py:obj:`~.cudaDriverEntryPointQueryResult` with the following + meanings: + + - :py:obj:`~.cudaDriverEntryPointSuccess` - The requested symbol was + succesfully found based on input arguments and `pfn` is valid + + - :py:obj:`~.cudaDriverEntryPointSymbolNotFound` - The requested symbol + was not found + + - :py:obj:`~.cudaDriverEntryPointVersionNotSufficent` - The requested + symbol was found but is not supported by the specified version + `cudaVersion` + + The requested flags can be: + + - :py:obj:`~.cudaEnableDefault`: This is the default mode. This is + equivalent to :py:obj:`~.cudaEnablePerThreadDefaultStream` if the + code is compiled with --default-stream per-thread compilation flag or + the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined; + :py:obj:`~.cudaEnableLegacyStream` otherwise. + + - :py:obj:`~.cudaEnableLegacyStream`: This will enable the search for + all driver symbols that match the requested driver symbol name except + the corresponding per-thread versions. + + - :py:obj:`~.cudaEnablePerThreadDefaultStream`: This will enable the + search for all driver symbols that match the requested driver symbol + name including the per-thread versions. If a per-thread version is + not found, the API will return the legacy version of the driver + function. + + Parameters + ---------- + symbol : bytes + The base name of the driver API function to look for. As an + example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol` + would be cuMemAlloc. + cudaVersion : unsigned int + The CUDA version to look for the requested driver symbol + flags : unsigned long long + Flags to specify search options. + + Returns + ------- + cudaError_t + :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported` + funcPtr : Any + Location to return the function pointer to the requested driver + function + driverStatus : :py:obj:`~.cudaDriverEntryPointQueryResult` + Optional location to store the status of finding the symbol from + the driver. See :py:obj:`~.cudaDriverEntryPointQueryResult` for + possible values. + + See Also + -------- + :py:obj:`~.cuGetProcAddress` + """ + cdef void_ptr funcPtr = 0 + cdef ccudart.cudaDriverEntryPointQueryResult driverStatus + err = ccudart.cudaGetDriverEntryPointByVersion(symbol, &funcPtr, cudaVersion, flags, &driverStatus) + return (cudaError_t(err), funcPtr, cudaDriverEntryPointQueryResult(driverStatus)) +{{endif}} + {{if 'cudaGetExportTable' in found_functions}} @cython.embedsignature(True) diff --git a/cuda/tests/test_interoperability.py b/cuda/tests/test_interoperability.py index 69b83011..aab3b9a9 100644 --- a/cuda/tests/test_interoperability.py +++ b/cuda/tests/test_interoperability.py @@ -191,3 +191,43 @@ def test_interop_graphExec(): assert(err_rt == cudart.cudaError_t.cudaSuccess) err_dr, = cuda.cuCtxDestroy(ctx) assert(err_dr == cuda.CUresult.CUDA_SUCCESS) + +def test_interop_deviceptr(): + # Init CUDA + err, = cuda.cuInit(0) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Get device + err, device = cuda.cuDeviceGet(0) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Construct context + err, ctx = cuda.cuCtxCreate(0, device) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Allocate dev memory + size = 1024 * np.uint8().itemsize + err_dr, dptr = cuda.cuMemAlloc(size) + assert(err_dr == cuda.CUresult.CUDA_SUCCESS) + + # Allocate host memory + h1 = np.full(size, 1).astype(np.uint8) + h2 = np.full(size, 2).astype(np.uint8) + assert(np.array_equal(h1, h2) is False) + + # Initialize device memory + err_rt, = cudart.cudaMemset(dptr, 1, size) + assert(err_rt == cudart.cudaError_t.cudaSuccess) + + # D to h2 + err_rt, = cudart.cudaMemcpy(h2, dptr, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost) + assert(err_rt == cudart.cudaError_t.cudaSuccess) + + # Validate h1 == h2 + assert(np.array_equal(h1, h2)) + + # Cleanup + err_dr, = cuda.cuMemFree(dptr) + assert(err_dr == cuda.CUresult.CUDA_SUCCESS) + err_dr, = cuda.cuCtxDestroy(ctx) + assert(err_dr == cuda.CUresult.CUDA_SUCCESS) diff --git a/docs/_sources/install.md.txt b/docs/_sources/install.md.txt index a258902c..d3f7584d 100644 --- a/docs/_sources/install.md.txt +++ b/docs/_sources/install.md.txt @@ -6,7 +6,7 @@ CUDA Python is supported on all platforms that CUDA is supported. Specific dependencies are as follows: * Driver: Linux (450.80.02 or later) Windows (456.38 or later) -* CUDA Toolkit 12.0 to 12.4 +* CUDA Toolkit 12.0 to 12.5 ```{note} Only the NVRTC redistributable component is required from the CUDA Toolkit. [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/index.html) Installation Guides can be used for guidance. Note that the NVRTC component in the Toolkit can be obtained via PYPI, Conda or Local Installer. ``` diff --git a/docs/_sources/module/cuda.rst.txt b/docs/_sources/module/cuda.rst.txt index 7dbf3286..30490126 100644 --- a/docs/_sources/module/cuda.rst.txt +++ b/docs/_sources/module/cuda.rst.txt @@ -33,6 +33,8 @@ Data types used by CUDA driver .. autoclass:: cuda.cuda.CUlaunchConfig_st .. autoclass:: cuda.cuda.CUexecAffinitySmCount_st .. autoclass:: cuda.cuda.CUexecAffinityParam_st +.. autoclass:: cuda.cuda.CUctxCigParam_st +.. autoclass:: cuda.cuda.CUctxCreateParams_st .. autoclass:: cuda.cuda.CUlibraryHostUniversalFunctionAndDataTable_st .. autoclass:: cuda.cuda.CUDA_MEMCPY2D_st .. autoclass:: cuda.cuda.CUDA_MEMCPY3D_st @@ -641,6 +643,87 @@ Data types used by CUDA driver 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_P010 + + + 10-bit YUV planar format, with 4:2:0 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_P016 + + + 16-bit YUV planar format, with 4:2:0 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_NV16 + + + 8-bit YUV planar format, with 4:2:2 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_P210 + + + 10-bit YUV planar format, with 4:2:2 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_P216 + + + 16-bit YUV planar format, with 4:2:2 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_YUY2 + + + 2 channel, 8-bit YUV packed planar format, with 4:2:2 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_Y210 + + + 2 channel, 10-bit YUV packed planar format, with 4:2:2 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_Y216 + + + 2 channel, 16-bit YUV packed planar format, with 4:2:2 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_AYUV + + + 4 channel, 8-bit YUV packed planar format, with 4:4:4 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_Y410 + + + 10-bit YUV packed planar format, with 4:4:4 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_Y416 + + + 4 channel, 12-bit YUV packed planar format, with 4:4:4 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_Y444_PLANAR8 + + + 3 channel 8-bit YUV planar format, with 4:4:4 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_Y444_PLANAR10 + + + 3 channel 10-bit YUV planar format, with 4:4:4 sampling + + + .. autoattribute:: cuda.cuda.CUarray_format.CU_AD_FORMAT_MAX + .. autoclass:: cuda.cuda.CUaddress_mode .. autoattribute:: cuda.cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_WRAP @@ -1494,9 +1577,15 @@ Data types used by CUDA driver .. autoattribute:: cuda.cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG + NUMA configuration of a device: value is of type :py:obj:`~.CUdeviceNumaConfig` enum + + .. autoattribute:: cuda.cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID + NUMA node ID of the GPU memory + + .. autoattribute:: cuda.cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED @@ -1515,6 +1604,12 @@ Data types used by CUDA driver NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA. + .. autoattribute:: cuda.cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED + + + Device supports CIG with D3D12. + + .. autoattribute:: cuda.cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX .. autoclass:: cuda.cuda.CUpointer_attribute @@ -2614,6 +2709,24 @@ Data types used by CUDA driver A size in bytes for L2 persisting lines cache size + .. autoattribute:: cuda.cuda.CUlimit.CU_LIMIT_SHMEM_SIZE + + + A maximum size in bytes of shared memory available to CUDA kernels on a CIG context. Can only be queried, cannot be set + + + .. autoattribute:: cuda.cuda.CUlimit.CU_LIMIT_CIG_ENABLED + + + A non-zero value indicates this CUDA context is a CIG-enabled context. Can only be queried, cannot be set + + + .. autoattribute:: cuda.cuda.CUlimit.CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED + + + When set to a non-zero value, CUDA will fail to launch a kernel on a CIG context, instead of using the fallback path, if the kernel uses more shared memory than available + + .. autoattribute:: cuda.cuda.CUlimit.CU_LIMIT_MAX .. autoclass:: cuda.cuda.CUresourcetype @@ -2968,6 +3081,12 @@ Data types used by CUDA driver If a graph contains device-updatable nodes and updates those nodes from the device from within the graph, the graph must be uploaded with :py:obj:`~.cuGraphUpload` before it is launched. For such a graph, if host-side executable graph updates are made to the device-updatable nodes, the graph must be uploaded before it is launched again. + + .. autoattribute:: cuda.cuda.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT + + + Valid for launches. On devices where the L1 cache and shared memory use the same hardware resources, setting :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage between 0-100 signals the CUDA driver to set the shared memory carveout preference, in percent of the total shared memory for that kernel launch. This attribute takes precedence over :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This is only a hint, and the CUDA driver can choose a different configuration if required for the launch. + .. autoclass:: cuda.cuda.CUstreamCaptureStatus .. autoattribute:: cuda.cuda.CUstreamCaptureStatus.CU_STREAM_CAPTURE_STATUS_NONE @@ -3045,6 +3164,10 @@ Data types used by CUDA driver .. autoattribute:: cuda.cuda.CUexecAffinityType.CU_EXEC_AFFINITY_TYPE_MAX +.. autoclass:: cuda.cuda.CUcigDataType + + .. autoattribute:: cuda.cuda.CUcigDataType.CIG_DATA_TYPE_D3D12_COMMAND_QUEUE + .. autoclass:: cuda.cuda.CUlibraryOption .. autoattribute:: cuda.cuda.CUlibraryOption.CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE @@ -5333,6 +5456,7 @@ Data types used by CUDA driver .. autoclass:: cuda.cuda.CUgraphConditionalHandle .. autoclass:: cuda.cuda.CUgraphDeviceNode .. autoclass:: cuda.cuda.CUasyncCallbackHandle +.. autoclass:: cuda.cuda.CUgreenCtx .. autoclass:: cuda.cuda.CUuuid .. autoclass:: cuda.cuda.CUmemFabricHandle_v1 .. autoclass:: cuda.cuda.CUmemFabricHandle @@ -5380,6 +5504,8 @@ Data types used by CUDA driver .. autoclass:: cuda.cuda.CUexecAffinitySmCount .. autoclass:: cuda.cuda.CUexecAffinityParam_v1 .. autoclass:: cuda.cuda.CUexecAffinityParam +.. autoclass:: cuda.cuda.CUctxCigParam +.. autoclass:: cuda.cuda.CUctxCreateParams .. autoclass:: cuda.cuda.CUlibraryHostUniversalFunctionAndDataTable .. autoclass:: cuda.cuda.CUstreamCallback .. autoclass:: cuda.cuda.CUoccupancyB2DSize @@ -5518,6 +5644,7 @@ Data types used by CUDA driver .. autoattribute:: cuda.cuda.CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP .. autoattribute:: cuda.cuda.CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN .. autoattribute:: cuda.cuda.CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE +.. autoattribute:: cuda.cuda.CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT .. autoattribute:: cuda.cuda.CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW .. autoattribute:: cuda.cuda.CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY .. autoattribute:: cuda.cuda.CU_STREAM_ATTRIBUTE_PRIORITY @@ -5627,6 +5754,10 @@ Data types used by CUDA driver This flag if set indicates that the CUDA array or CUDA mipmapped array will allow deferred memory mapping +.. autoattribute:: cuda.cuda.CUDA_ARRAY3D_VIDEO_ENCODE_DECODE + + This flag indicates that the CUDA array will be used for hardware accelerated video encode/decode operations. + .. autoattribute:: cuda.cuda.CU_TRSA_OVERRIDE_FORMAT Override the texref format with a format inferred from the array. Flag for :py:obj:`~.cuTexRefSetArray()` @@ -5765,6 +5896,7 @@ Please note that some functions are described in Primary Context Management sect .. autofunction:: cuda.cuda.cuCtxCreate .. autofunction:: cuda.cuda.cuCtxCreate_v3 +.. autofunction:: cuda.cuda.cuCtxCreate_v4 .. autofunction:: cuda.cuda.cuCtxDestroy .. autofunction:: cuda.cuda.cuCtxPushCurrent .. autofunction:: cuda.cuda.cuCtxPopCurrent @@ -5783,6 +5915,8 @@ Please note that some functions are described in Primary Context Management sect .. autofunction:: cuda.cuda.cuCtxGetStreamPriorityRange .. autofunction:: cuda.cuda.cuCtxResetPersistingL2Cache .. autofunction:: cuda.cuda.cuCtxGetExecAffinity +.. autofunction:: cuda.cuda.cuCtxRecordEvent +.. autofunction:: cuda.cuda.cuCtxWaitEvent Module Management ----------------- @@ -5831,6 +5965,7 @@ This section describes the library management functions of the low-level CUDA dr .. autofunction:: cuda.cuda.cuLibraryEnumerateKernels .. autofunction:: cuda.cuda.cuLibraryGetModule .. autofunction:: cuda.cuda.cuKernelGetFunction +.. autofunction:: cuda.cuda.cuKernelGetLibrary .. autofunction:: cuda.cuda.cuLibraryGetGlobal .. autofunction:: cuda.cuda.cuLibraryGetManaged .. autofunction:: cuda.cuda.cuLibraryGetUnifiedFunction @@ -6108,6 +6243,7 @@ This section describes the stream management functions of the low-level CUDA dri .. autofunction:: cuda.cuda.cuStreamGetFlags .. autofunction:: cuda.cuda.cuStreamGetId .. autofunction:: cuda.cuda.cuStreamGetCtx +.. autofunction:: cuda.cuda.cuStreamGetCtx_v2 .. autofunction:: cuda.cuda.cuStreamWaitEvent .. autofunction:: cuda.cuda.cuStreamAddCallback .. autofunction:: cuda.cuda.cuStreamBeginCapture @@ -6409,8 +6545,33 @@ This section describes the coredump attribute control functions of the low-level .. autoattribute:: cuda.cuda.CUcoredumpSettings.CU_COREDUMP_PIPE + .. autoattribute:: cuda.cuda.CUcoredumpSettings.CU_COREDUMP_GENERATION_FLAGS + + .. autoattribute:: cuda.cuda.CUcoredumpSettings.CU_COREDUMP_MAX +.. autoclass:: cuda.cuda.CUCoredumpGenerationFlags + + .. autoattribute:: cuda.cuda.CUCoredumpGenerationFlags.CU_COREDUMP_DEFAULT_FLAGS + + + .. autoattribute:: cuda.cuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES + + + .. autoattribute:: cuda.cuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_GLOBAL_MEMORY + + + .. autoattribute:: cuda.cuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_SHARED_MEMORY + + + .. autoattribute:: cuda.cuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_LOCAL_MEMORY + + + .. autoattribute:: cuda.cuda.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_ABORT + + + .. autoattribute:: cuda.cuda.CUCoredumpGenerationFlags.CU_COREDUMP_LIGHTWEIGHT_FLAGS + .. autofunction:: cuda.cuda.cuCoredumpGetAttribute .. autofunction:: cuda.cuda.cuCoredumpGetAttributeGlobal .. autofunction:: cuda.cuda.cuCoredumpSetAttribute @@ -6526,6 +6687,13 @@ Even if the green contexts have disjoint SM partitions, it is not guaranteed tha Required. Creates a default stream to use inside the green context +.. autoclass:: cuda.cuda.CUdevSmResourceSplit_flags + + .. autoattribute:: cuda.cuda.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING + + + .. autoattribute:: cuda.cuda.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE + .. autoclass:: cuda.cuda.CUdevResourceType .. autoattribute:: cuda.cuda.CUdevResourceType.CU_DEV_RESOURCE_TYPE_INVALID @@ -6536,7 +6704,6 @@ Even if the green contexts have disjoint SM partitions, it is not guaranteed tha Streaming multiprocessors related information -.. autoclass:: cuda.cuda.CUgreenCtx .. autoclass:: cuda.cuda.CUdevResourceDesc .. autoclass:: cuda.cuda.CUdevSmResource .. autofunction:: cuda.cuda._CONCAT_OUTER @@ -6551,6 +6718,7 @@ Even if the green contexts have disjoint SM partitions, it is not guaranteed tha .. autofunction:: cuda.cuda.cuGreenCtxRecordEvent .. autofunction:: cuda.cuda.cuGreenCtxWaitEvent .. autofunction:: cuda.cuda.cuStreamGetGreenCtx +.. autofunction:: cuda.cuda.cuGreenCtxStreamCreate .. autoattribute:: cuda.cuda.RESOURCE_ABI_VERSION .. autoattribute:: cuda.cuda.RESOURCE_ABI_EXTERNAL_BYTES .. autoattribute:: cuda.cuda._CONCAT_INNER diff --git a/docs/_sources/module/cudart.rst.txt b/docs/_sources/module/cudart.rst.txt index dfdcabc4..9e53a56c 100644 --- a/docs/_sources/module/cudart.rst.txt +++ b/docs/_sources/module/cudart.rst.txt @@ -574,6 +574,7 @@ Driver Entry Point Access This section describes the driver entry point access functions of CUDA runtime application programming interface. .. autofunction:: cuda.cudart.cudaGetDriverEntryPoint +.. autofunction:: cuda.cudart.cudaGetDriverEntryPointByVersion C++ API Routines ---------------- @@ -3959,7 +3960,7 @@ Data types used by CUDA Runtime .. autoattribute:: cuda.cudart.cudaDeviceAttr.cudaDevAttrNumaConfig - NUMA configuration of a device: value is of type cudaDeviceNumaConfig enum + NUMA configuration of a device: value is of type :py:obj:`~.cudaDeviceNumaConfig` enum .. autoattribute:: cuda.cudart.cudaDeviceAttr.cudaDevAttrNumaId @@ -3983,6 +3984,12 @@ Data types used by CUDA Runtime NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA. + .. autoattribute:: cuda.cudart.cudaDeviceAttr.cudaDevAttrD3D12CigSupported + + + Device supports CIG with D3D12. + + .. autoattribute:: cuda.cudart.cudaDeviceAttr.cudaDevAttrMax .. autoclass:: cuda.cudart.cudaMemPoolAttr @@ -4119,6 +4126,12 @@ Data types used by CUDA Runtime Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE) + + .. autoattribute:: cuda.cudart.cudaMemAllocationHandleType.cudaMemHandleTypeFabric + + + Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t) + .. autoclass:: cuda.cudart.cudaGraphMemAttributeType .. autoattribute:: cuda.cudart.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemCurrent @@ -4796,6 +4809,12 @@ Data types used by CUDA Runtime If a graph contains device-updatable nodes and updates those nodes from the device from within the graph, the graph must be uploaded with :py:obj:`~.cuGraphUpload` before it is launched. For such a graph, if host-side executable graph updates are made to the device-updatable nodes, the graph must be uploaded before it is launched again. + + .. autoattribute:: cuda.cudart.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout + + + Valid for launches. On devices where the L1 cache and shared memory use the same hardware resources, setting :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a percentage between 0-100 signals sets the shared memory carveout preference in percent of the total shared memory for that kernel launch. This attribute takes precedence over :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is only a hint, and the driver can choose a different configuration if required for the launch. + .. autoclass:: cuda.cudart.cudaDeviceNumaConfig .. autoattribute:: cuda.cudart.cudaDeviceNumaConfig.cudaDeviceNumaConfigNone @@ -5218,6 +5237,7 @@ Data types used by CUDA Runtime .. autoattribute:: cuda.cudart.cudaKernelNodeAttributeClusterSchedulingPolicyPreference .. autoattribute:: cuda.cudart.cudaKernelNodeAttributeMemSyncDomainMap .. autoattribute:: cuda.cudart.cudaKernelNodeAttributeMemSyncDomain +.. autoattribute:: cuda.cudart.cudaKernelNodeAttributePreferredSharedMemoryCarveout .. autoattribute:: cuda.cudart.cudaKernelNodeAttributeDeviceUpdatableKernelNode .. autoattribute:: cuda.cudart.cudaKernelNodeAttrValue .. autoattribute:: cuda.cudart.cudaSurfaceType1D diff --git a/docs/_sources/release.md.txt b/docs/_sources/release.md.txt index a39f1854..d3f2e8da 100644 --- a/docs/_sources/release.md.txt +++ b/docs/_sources/release.md.txt @@ -5,6 +5,7 @@ maxdepth: 3 --- + 12.5.0 12.4.0 12.3.0 12.2.1 diff --git a/docs/_sources/release/12.2.1-notes.md.txt b/docs/_sources/release/12.2.1-notes.md.txt index 7525d624..41704a56 100644 --- a/docs/_sources/release/12.2.1-notes.md.txt +++ b/docs/_sources/release/12.2.1-notes.md.txt @@ -1,6 +1,6 @@ # CUDA Python 12.2.1 Release notes -Released on January ??, 2024 +Released on January 8, 2024 ## Hightlights - Compatibility with Cython 3 diff --git a/docs/_sources/release/12.4.0-notes.md.txt b/docs/_sources/release/12.4.0-notes.md.txt index 8af49df5..6daedb20 100644 --- a/docs/_sources/release/12.4.0-notes.md.txt +++ b/docs/_sources/release/12.4.0-notes.md.txt @@ -1,6 +1,6 @@ # CUDA Python 12.4.0 Release notes -Released on January ??, 2024 +Released on March 5, 2024 ## Hightlights - Rebase to CUDA Toolkit 12.4 diff --git a/docs/_sources/release/12.5.0-notes.md.txt b/docs/_sources/release/12.5.0-notes.md.txt new file mode 100644 index 00000000..701f0320 --- /dev/null +++ b/docs/_sources/release/12.5.0-notes.md.txt @@ -0,0 +1,34 @@ +# CUDA Python 12.5.0 Release notes + +Released on May 21, 2024 + +## Hightlights +- Rebase to CUDA Toolkit 12.5 +- Resolve [Issue #58](https://github.com/NVIDIA/cuda-python/issues/58): Interop between CUdeviceptr and Runtime + +## Limitations + +### CUDA Functions Not Supported in this Release + +- Symbol APIs + - cudaGraphExecMemcpyNodeSetParamsFromSymbol + - cudaGraphExecMemcpyNodeSetParamsToSymbol + - cudaGraphAddMemcpyNodeToSymbol + - cudaGraphAddMemcpyNodeFromSymbol + - cudaGraphMemcpyNodeSetParamsToSymbol + - cudaGraphMemcpyNodeSetParamsFromSymbol + - cudaMemcpyToSymbol + - cudaMemcpyFromSymbol + - cudaMemcpyToSymbolAsync + - cudaMemcpyFromSymbolAsync + - cudaGetSymbolAddress + - cudaGetSymbolSize + - cudaGetFuncBySymbol +- Launch Options + - cudaLaunchKernel + - cudaLaunchCooperativeKernel + - cudaLaunchCooperativeKernelMultiDevice +- cudaSetValidDevices +- cudaVDPAUSetVDPAUDevice +- cudaFuncGetName +- cudaFuncGetParamInfo diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js index 29619378..4fb50319 100644 --- a/docs/_static/documentation_options.js +++ b/docs/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '12.4.0', + VERSION: '12.5.0', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/api.html b/docs/api.html index d79c8f65..3a962ce5 100644 --- a/docs/api.html +++ b/docs/api.html @@ -7,7 +7,7 @@ - CUDA Python API Reference - CUDA Python 12.4.0 documentation + CUDA Python API Reference - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
@@ -152,7 +152,7 @@
- CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation
@@ -152,7 +152,7 @@
- CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
      +
    • 12.5.0
    • 12.4.0
    • 12.3.0
    • 12.2.1
    • diff --git a/docs/contribute.html b/docs/contribute.html index 843cb8ad..810ab6dd 100644 --- a/docs/contribute.html +++ b/docs/contribute.html @@ -7,7 +7,7 @@ - Contributing - CUDA Python 12.4.0 documentation + Contributing - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
  • @@ -152,7 +152,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
      +
    • 12.5.0
    • 12.4.0
    • 12.3.0
    • 12.2.1
    • diff --git a/docs/genindex.html b/docs/genindex.html index 0bf63be9..b533d5df 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -5,7 +5,7 @@ - Index - CUDA Python 12.4.0 documentation + Index - CUDA Python 12.5.0 documentation @@ -123,7 +123,7 @@
  • @@ -150,7 +150,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -166,6 +166,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes + -
  • - - + +
  • numExecAffinityParams (cuda.cuda.CUctxCreateParams attribute) + +
  • numExtSems (cuda.cuda.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS attribute) @@ -8782,10 +8909,10 @@

    N

  • (cuda.cudart.cudaExternalMemoryMipmappedArrayDesc attribute)
  • -
  • numRegs (cuda.cudart.cudaFuncAttributes attribute) -
  • + -
    • srcHost (cuda.cuda.CUDA_MEMCPY2D attribute)
        diff --git a/docs/index.html b/docs/index.html index f72033a8..3687f127 100644 --- a/docs/index.html +++ b/docs/index.html @@ -7,7 +7,7 @@ - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    @@ -152,7 +152,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
  • @@ -152,7 +152,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
      +
    • 12.5.0
    • 12.4.0
    • 12.3.0
    • 12.2.1
    • @@ -234,7 +235,7 @@

      Runtime Requirements
      • Driver: Linux (450.80.02 or later) Windows (456.38 or later)

      • -
      • CUDA Toolkit 12.0 to 12.4

      • +
      • CUDA Toolkit 12.0 to 12.5

      Note

      diff --git a/docs/module/cuda.html b/docs/module/cuda.html index e0713cdf..18527b58 100644 --- a/docs/module/cuda.html +++ b/docs/module/cuda.html @@ -7,7 +7,7 @@ - cuda - CUDA Python 12.4.0 documentation + cuda - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
      @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • @@ -1794,6 +1795,18 @@

        Data types used by CUDA driver +
        +sharedMemCarveout#
        +

        Value of launch attribute +CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.

        +
        +
        Type:
        +

        unsigned int

        +
        +
        +
        +
        getPtr()#
        @@ -2013,6 +2026,81 @@

        Data types used by CUDA driver +
        +class cuda.cuda.CUctxCigParam_st(void_ptr _ptr=0)#
        +

        CIG Context Create Params

        +
        +
        +sharedDataType#
        +
        +
        Type:
        +

        CUcigDataType

        +
        +
        +
        + +
        +
        +sharedData#
        +
        +
        Type:
        +

        Any

        +
        +
        +
        + +
        +
        +getPtr()#
        +

        Get memory address of class instance

        +
        + +

        + +
        +
        +class cuda.cuda.CUctxCreateParams_st(void_ptr _ptr=0)#
        +

        Params for creating CUDA context Exactly one of execAffinityParams +and cigParams must be non-NULL.

        +
        +
        +execAffinityParams#
        +
        +
        Type:
        +

        CUexecAffinityParam

        +
        +
        +
        + +
        +
        +numExecAffinityParams#
        +
        +
        Type:
        +

        int

        +
        +
        +
        + +
        +
        +cigParams#
        +
        +
        Type:
        +

        CUctxCigParam

        +
        +
        +
        + +
        +
        +getPtr()#
        +

        Get memory address of class instance

        +
        + +
        +
        class cuda.cuda.CUlibraryHostUniversalFunctionAndDataTable_st(void_ptr _ptr=0)#
        @@ -5796,6 +5884,89 @@

        Data types used by CUDA driver +
        +CU_AD_FORMAT_P010 = 159#
        +

        10-bit YUV planar format, with 4:2:0 sampling

        +

        + +
        +
        +CU_AD_FORMAT_P016 = 161#
        +

        16-bit YUV planar format, with 4:2:0 sampling

        +
        + +
        +
        +CU_AD_FORMAT_NV16 = 162#
        +

        8-bit YUV planar format, with 4:2:2 sampling

        +
        + +
        +
        +CU_AD_FORMAT_P210 = 163#
        +

        10-bit YUV planar format, with 4:2:2 sampling

        +
        + +
        +
        +CU_AD_FORMAT_P216 = 164#
        +

        16-bit YUV planar format, with 4:2:2 sampling

        +
        + +
        +
        +CU_AD_FORMAT_YUY2 = 165#
        +

        2 channel, 8-bit YUV packed planar format, with 4:2:2 sampling

        +
        + +
        +
        +CU_AD_FORMAT_Y210 = 166#
        +

        2 channel, 10-bit YUV packed planar format, with 4:2:2 sampling

        +
        + +
        +
        +CU_AD_FORMAT_Y216 = 167#
        +

        2 channel, 16-bit YUV packed planar format, with 4:2:2 sampling

        +
        + +
        +
        +CU_AD_FORMAT_AYUV = 168#
        +

        4 channel, 8-bit YUV packed planar format, with 4:4:4 sampling

        +
        + +
        +
        +CU_AD_FORMAT_Y410 = 169#
        +

        10-bit YUV packed planar format, with 4:4:4 sampling

        +
        + +
        +
        +CU_AD_FORMAT_Y416 = 177#
        +

        4 channel, 12-bit YUV packed planar format, with 4:4:4 sampling

        +
        + +
        +
        +CU_AD_FORMAT_Y444_PLANAR8 = 178#
        +

        3 channel 8-bit YUV planar format, with 4:4:4 sampling

        +
        + +
        +
        +CU_AD_FORMAT_Y444_PLANAR10 = 179#
        +

        3 channel 10-bit YUV planar format, with 4:4:4 sampling

        +
        + +
        +
        +CU_AD_FORMAT_MAX = 2147483647#
        +
        +
        @@ -6663,12 +6834,14 @@

        Data types used by CUDA driver
        CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = 130#
        -

        +

        NUMA configuration of a device: value is of type CUdeviceNumaConfig enum

        +
        CU_DEVICE_ATTRIBUTE_NUMA_ID = 131#
        -
        +

        NUMA node ID of the GPU memory

        +
        @@ -6688,9 +6861,15 @@

        Data types used by CUDA driver +
        +CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135#
        +

        Device supports CIG with D3D12.

        +

        +
        -CU_DEVICE_ATTRIBUTE_MAX = 135#
        +CU_DEVICE_ATTRIBUTE_MAX = 136#
        @@ -7793,9 +7972,27 @@

        Data types used by CUDA driver +
        +CU_LIMIT_SHMEM_SIZE = 7#
        +

        A maximum size in bytes of shared memory available to CUDA kernels on a CIG context. Can only be queried, cannot be set

        +
        + +
        +
        +CU_LIMIT_CIG_ENABLED = 8#
        +

        A non-zero value indicates this CUDA context is a CIG-enabled context. Can only be queried, cannot be set

        +
        + +
        +
        +CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = 9#
        +

        When set to a non-zero value, CUDA will fail to launch a kernel on a CIG context, instead of using the fallback path, if the kernel uses more shared memory than available

        +
        +
        -CU_LIMIT_MAX = 7#
        +CU_LIMIT_MAX = 10#
        @@ -8214,6 +8411,12 @@

        Data types used by CUDA driver +
        +CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14#
        +

        Valid for launches. On devices where the L1 cache and shared memory use the same hardware resources, setting sharedMemCarveout to a percentage between 0-100 signals the CUDA driver to set the shared memory carveout preference, in percent of the total shared memory for that kernel launch. This attribute takes precedence over CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT. This is only a hint, and the CUDA driver can choose a different configuration if required for the launch.

        +
        +
        @@ -8331,6 +8534,16 @@

        Data types used by CUDA driver +
        +class cuda.cuda.CUcigDataType(value)#
        +
        +
        +CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = 1#
        +
        + +

        +
        class cuda.cuda.CUlibraryOption(value)#
        @@ -10121,7 +10334,8 @@

        Data types used by CUDA driver
        class cuda.cuda.CUdeviceNumaConfig(value)#
        -
        +

        CUDA device NUMA configuration

        +
        CU_DEVICE_NUMA_CONFIG_NONE = 0#

        The GPU is not a NUMA node

        @@ -10930,7 +11144,7 @@

        Data types used by CUDA driver
        class cuda.cuda.CUcontext(*args, **kwargs)#
        -

        CUDA context

        +

        A regular context handle

        getPtr()#
        @@ -11238,6 +11452,18 @@

        Data types used by CUDA driver +
        +class cuda.cuda.CUgreenCtx(*args, **kwargs)#
        +

        A green context handle. This handle can be used safely from only one CPU thread at a time. Created via cuGreenCtxCreate

        +
        +
        +getPtr()#
        +

        Get memory address of class instance

        +
        + +

        +
        class cuda.cuda.CUuuid#
        @@ -13486,6 +13712,18 @@

        Data types used by CUDA driver +
        +sharedMemCarveout#
        +

        Value of launch attribute +CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.

        +
        +
        Type:
        +

        unsigned int

        +
        +
        +

        +
        getPtr()#
        @@ -13820,6 +14058,18 @@

        Data types used by CUDA driver +
        +sharedMemCarveout#
        +

        Value of launch attribute +CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.

        +
        +
        Type:
        +

        unsigned int

        +
        +
        +

        +
        getPtr()#
        @@ -13991,6 +14241,18 @@

        Data types used by CUDA driver +
        +sharedMemCarveout#
        +

        Value of launch attribute +CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.

        +
        +
        Type:
        +

        unsigned int

        +
        +
        +

        +
        getPtr()#
        @@ -14169,6 +14431,18 @@

        Data types used by CUDA driver +
        +sharedMemCarveout#
        +

        Value of launch attribute +CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.

        +
        +
        Type:
        +

        unsigned int

        +
        +
        +

        +
        getPtr()#
        @@ -14340,6 +14614,18 @@

        Data types used by CUDA driver +
        +sharedMemCarveout#
        +

        Value of launch attribute +CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.

        +
        +
        Type:
        +

        unsigned int

        +
        +
        +

        +
        getPtr()#
        @@ -14458,6 +14744,81 @@

        Data types used by CUDA driver +
        +class cuda.cuda.CUctxCigParam#
        +

        CIG Context Create Params

        +
        +
        +sharedDataType#
        +
        +
        Type:
        +

        CUcigDataType

        +
        +
        +
        + +
        +
        +sharedData#
        +
        +
        Type:
        +

        Any

        +
        +
        +
        + +
        +
        +getPtr()#
        +

        Get memory address of class instance

        +
        + +

        + +
        +
        +class cuda.cuda.CUctxCreateParams#
        +

        Params for creating CUDA context Exactly one of execAffinityParams +and cigParams must be non-NULL.

        +
        +
        +execAffinityParams#
        +
        +
        Type:
        +

        CUexecAffinityParam

        +
        +
        +
        + +
        +
        +numExecAffinityParams#
        +
        +
        Type:
        +

        int

        +
        +
        +
        + +
        +
        +cigParams#
        +
        +
        Type:
        +

        CUctxCigParam

        +
        +
        +
        + +
        +
        +getPtr()#
        +

        Get memory address of class instance

        +
        + +
        +
        class cuda.cuda.CUlibraryHostUniversalFunctionAndDataTable#
        @@ -20173,7 +20534,7 @@

        Data types used by CUDA driver
        -cuda.CUDA_VERSION = 12040#
        +cuda.CUDA_VERSION = 12050#

        CUDA API version number

        @@ -20268,6 +20629,11 @@

        Data types used by CUDA drivercuda.CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13#

        +
        +
        +cuda.CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14#
        +
        +
        cuda.CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1#
        @@ -20449,6 +20815,12 @@

        Data types used by CUDA driver +
        +cuda.CUDA_ARRAY3D_VIDEO_ENCODE_DECODE = 256#
        +

        This flag indicates that the CUDA array will be used for hardware accelerated video encode/decode operations.

        +

        +
        cuda.CU_TRSA_OVERRIDE_FORMAT = 1#
        @@ -21147,6 +21519,12 @@

        Device ManagementCU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED: Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays.

        +
      • CU_DEVICE_ATTRIBUTE_NUMA_CONFIG: NUMA configuration of a +device: value is of type CUdeviceNumaConfig enum

      • +
      • CU_DEVICE_ATTRIBUTE_NUMA_ID: NUMA node ID of the GPU +memory

      • +
      • CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED: Device supports +switch multicast and reduction operations.

      Parameters:
      @@ -21697,11 +22075,139 @@

      Context ManagementCU_COREDUMP_ENABLE_USER_TRIGGER attribute to true globally.

    • -
    • CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory -operations initiated on this context will always synchronize. See -further documentation in the section titled “API Synchronization -behavior” to learn more about cases when synchronous memory -operations can exhibit asynchronous behavior.

    • +
    • CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory +operations initiated on this context will always synchronize. See +further documentation in the section titled “API Synchronization +behavior” to learn more about cases when synchronous memory +operations can exhibit asynchronous behavior.

    • +

    +

    Context creation will fail with CUDA_ERROR_UNKNOWN if the +compute mode of the device is CU_COMPUTEMODE_PROHIBITED. +The function cuDeviceGetAttribute() can be used with +CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute +mode of the device. The nvidia-smi tool can be used to set the +compute mode for * devices. Documentation for nvidia-smi can be +obtained by passing a -h option to it.

    +
    +
    Parameters:
    +
      +
    • flags (unsigned int) – Context creation flags

    • +
    • dev (CUdevice) – Device to create context on

    • +
    +
    +
    Returns:
    +

    +

    +
    +
    + +

    Notes

    +

    In most cases it is recommended to use cuDevicePrimaryCtxRetain.

    + + +
    +
    +cuda.cuda.cuCtxCreate_v3(list paramsArray: Optional[List[CUexecAffinityParam]], int numParams, unsigned int flags, dev)#
    +

    Create a CUDA context with execution affinity.

    +

    Creates a new CUDA context with execution affinity and associates it +with the calling thread. The paramsArray and flags parameter are +described below. The context is created with a usage count of 1 and the +caller of cuCtxCreate() must call +cuCtxDestroy() when done using the context. If a context is +already current to the thread, it is supplanted by the newly created +context and may be restored by a subsequent call to +cuCtxPopCurrent().

    +

    The type and the amount of execution resource the context can use is +limited by paramsArray and numParams. The paramsArray is an array +of CUexecAffinityParam and the numParams describes the size of the +array. If two CUexecAffinityParam in the array have the same type, +the latter execution affinity parameter overrides the former execution +affinity parameter. The supported execution affinity types are:

    +
      +
    • CU_EXEC_AFFINITY_TYPE_SM_COUNT limits the portion of SMs +that the context can use. The portion of SMs is specified as the +number of SMs via CUexecAffinitySmCount. This limit will be +internally rounded up to the next hardware-supported amount. Hence, +it is imperative to query the actual execution affinity of the +context via cuCtxGetExecAffinity after context creation. Currently, +this attribute is only supported under Volta+ MPS.

    • +
    +

    The three LSBs of the flags parameter can be used to control how the +OS thread, which owns the CUDA context at the time of an API call, +interacts with the OS scheduler when waiting for results from the GPU. +Only one of the scheduling flags can be set when creating a context.

    +
      +
    • CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when +waiting for results from the GPU. This can decrease latency when +waiting for the GPU, but may lower the performance of CPU threads if +they are performing work in parallel with the CUDA thread.

    • +
    • CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread +when waiting for results from the GPU. This can increase latency when +waiting for the GPU, but can increase the performance of CPU threads +performing work in parallel with the GPU.

    • +
    • CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the +CPU thread on a synchronization primitive when waiting for the GPU to +finish work.

    • +
    • CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU +thread on a synchronization primitive when waiting for the GPU to +finish work. Deprecated: This flag was deprecated as of CUDA 4.0 +and was replaced with CU_CTX_SCHED_BLOCKING_SYNC.

    • +
    • CU_CTX_SCHED_AUTO: The default value if the flags +parameter is zero, uses a heuristic based on the number of active +CUDA contexts in the process C and the number of logical processors +in the system P. If C > P, then CUDA will yield to other OS +threads when waiting for the GPU (CU_CTX_SCHED_YIELD), +otherwise CUDA will not yield while waiting for results and actively +spin on the processor (CU_CTX_SCHED_SPIN). Additionally, +on Tegra devices, CU_CTX_SCHED_AUTO uses a heuristic +based on the power profile of the platform and may choose +CU_CTX_SCHED_BLOCKING_SYNC for low-powered devices.

    • +
    • CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned +allocations. This flag must be set in order to allocate pinned host +memory that is accessible to the GPU.

    • +
    • CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce +local memory after resizing local memory for a kernel. This can +prevent thrashing by local memory allocations when launching many +kernels with high local memory usage at the cost of potentially +increased memory usage. Deprecated: This flag is deprecated and the +behavior enabled by this flag is now the default and cannot be +disabled. Instead, the per-thread stack size can be controlled with +cuCtxSetLimit().

    • +
    • CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been +enabled globally with cuCoredumpSetAttributeGlobal or +environment variables, this flag can be set during context creation +to instruct CUDA to create a coredump if this context raises an +exception during execution. These environment variables are described +in the CUDA-GDB user guide under the “GPU core dump support” section. +The initial attributes will be taken from the global attributes at +the time of context creation. The other attributes that control +coredump output can be modified by calling +cuCoredumpSetAttribute from the created context after it +becomes current.

    • +
    • CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU +coredumps have not been enabled globally with +cuCoredumpSetAttributeGlobal or environment variables, +this flag can be set during context creation to instruct CUDA to +create a coredump if data is written to a certain pipe that is +present in the OS space. These environment variables are described in +the CUDA-GDB user guide under the “GPU core dump support” section. It +is important to note that the pipe name must be set with +cuCoredumpSetAttributeGlobal before creating the context +if this flag is used. Setting this flag implies that +CU_CTX_COREDUMP_ENABLE is set. The initial attributes +will be taken from the global attributes at the time of context +creation. The other attributes that control coredump output can be +modified by calling cuCoredumpSetAttribute from the +created context after it becomes current. Setting this flag on any +context creation is equivalent to setting the +CU_COREDUMP_ENABLE_USER_TRIGGER attribute to true +globally.

    Context creation will fail with CUDA_ERROR_UNKNOWN if the compute mode of the device is CU_COMPUTEMODE_PROHIBITED. @@ -21713,13 +22219,15 @@

    Context Management
    Parameters:
    Returns:

    @@ -21727,30 +22235,29 @@

    Context Management

    See also

    -

    cuCtxDestroy, cuCtxGetApiVersion, cuCtxGetCacheConfig, cuCtxGetDevice, cuCtxGetFlags, cuCtxGetLimit, cuCtxPopCurrent, cuCtxPushCurrent, cuCtxSetCacheConfig, cuCtxSetLimit, cuCoredumpSetAttributeGlobal, cuCoredumpSetAttribute, cuCtxSynchronize

    +

    cuCtxDestroy, cuCtxGetApiVersion, cuCtxGetCacheConfig, cuCtxGetDevice, cuCtxGetFlags, cuCtxGetLimit, cuCtxPopCurrent, cuCtxPushCurrent, cuCtxSetCacheConfig, cuCtxSetLimit, cuCtxSynchronize, cuCoredumpSetAttributeGlobal, cuCoredumpSetAttribute, CUexecAffinityParam

  • -

    Notes

    -

    In most cases it is recommended to use cuDevicePrimaryCtxRetain.

    -
    -cuda.cuda.cuCtxCreate_v3(list paramsArray: Optional[List[CUexecAffinityParam]], int numParams, unsigned int flags, dev)#
    -

    Create a CUDA context with execution affinity.

    -

    Creates a new CUDA context with execution affinity and associates it -with the calling thread. The paramsArray and flags parameter are -described below. The context is created with a usage count of 1 and the -caller of cuCtxCreate() must call +

    +cuda.cuda.cuCtxCreate_v4(CUctxCreateParams ctxCreateParams: Optional[CUctxCreateParams], unsigned int flags, dev)#
    +

    Create a CUDA context.

    +

    Creates a new CUDA context and associates it with the calling thread. +The flags parameter is described below. The context is created with a +usage count of 1 and the caller of cuCtxCreate() must call cuCtxDestroy() when done using the context. If a context is already current to the thread, it is supplanted by the newly created context and may be restored by a subsequent call to cuCtxPopCurrent().

    -

    The type and the amount of execution resource the context can use is -limited by paramsArray and numParams. The paramsArray is an array -of CUexecAffinityParam and the numParams describes the size of the -array. If two CUexecAffinityParam in the array have the same type, -the latter execution affinity parameter overrides the former execution -affinity parameter. The supported execution affinity types are:

    +

    CUDA context can be created with execution affinity. The type and the +amount of execution resource the context can use is limited by +paramsArray and numExecAffinityParams in execAffinity. The +paramsArray is an array of CUexecAffinityParam and the +numExecAffinityParams describes the size of the paramsArray. If two +CUexecAffinityParam in the array have the same type, the latter +execution affinity parameter overrides the former execution affinity +parameter. The supported execution affinity types are:

    • CU_EXEC_AFFINITY_TYPE_SM_COUNT limits the portion of SMs that the context can use. The portion of SMs is specified as the @@ -21760,6 +22267,15 @@

      Context ManagementcuDeviceGetAttribute() with +CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED. Data from graphics +client is shared with CUDA via the /p sharedData in /pcigParams. For +D3D12, /p sharedData is a ID3D12CommandQueue handle.

      +

      Either /p execAffinityParams or /p cigParams can be set to a non-null +value. Setting both to a non-null value will result in an undefined +behavior.

      The three LSBs of the flags parameter can be used to control how the OS thread, which owns the CUDA context at the time of an API call, interacts with the OS scheduler when waiting for results from the GPU. @@ -21811,7 +22327,8 @@

      Context ManagementcuCoredumpSetAttribute from the created context after it -becomes current.

    • +becomes current. This flag is not supported when CUDA context is +created in CIG(CUDA in Graphics) mode.

    • CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not been enabled globally with cuCoredumpSetAttributeGlobal or environment variables, @@ -21829,7 +22346,13 @@

      Context ManagementCU_COREDUMP_ENABLE_USER_TRIGGER attribute to true -globally.

    • +globally. This flag is not supported when CUDA context is created in +CIG(CUDA in Graphics) mode.

      +
    • CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory +operations initiated on this context will always synchronize. See +further documentation in the section titled “API Synchronization +behavior” to learn more about cases when synchronous memory +operations can exhibit asynchronous behavior.

    Context creation will fail with CUDA_ERROR_UNKNOWN if the compute mode of the device is CU_COMPUTEMODE_PROHIBITED. @@ -21838,18 +22361,25 @@

    Context ManagementCUDA_ERROR_NOT_SUPPORTED if CIG is not supported by the +device or the driver. Hardware support is available starting Volta. +Software support for graphics clients can be determined using +cuDeviceGetAttribute() with +CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED.

    Parameters:
      -
    • paramsArray (List[CUexecAffinityParam]) – Execution affinity parameters

    • -
    • numParams (int) – Number of execution affinity parameters

    • +
    • ctxCreateParams (CUctxCreateParams) – Context creation parameters

    • flags (unsigned int) – Context creation flags

    • dev (CUdevice) – Device to create context on

    Returns:

    @@ -21857,7 +22387,7 @@

    Context Management

    See also

    -

    cuCtxDestroy, cuCtxGetApiVersion, cuCtxGetCacheConfig, cuCtxGetDevice, cuCtxGetFlags, cuCtxGetLimit, cuCtxPopCurrent, cuCtxPushCurrent, cuCtxSetCacheConfig, cuCtxSetLimit, cuCtxSynchronize, cuCoredumpSetAttributeGlobal, cuCoredumpSetAttribute, CUexecAffinityParam

    +

    cuCtxDestroy, cuCtxGetApiVersion, cuCtxGetCacheConfig, cuCtxGetDevice, cuCtxGetFlags, cuCtxGetLimit, cuCtxPopCurrent, cuCtxPushCurrent, cuCtxSetCacheConfig, cuCtxSetLimit, cuCoredumpSetAttributeGlobal, cuCoredumpSetAttribute, cuCtxSynchronize

    @@ -21872,13 +22402,16 @@

    Context ManagementCUmodule, CUfunction, CUstream, +result in undefined behavior. These resources include CUDA types +CUmodule, CUfunction, CUstream, CUevent, CUarray, CUmipmappedArray, CUtexObject, CUsurfObject, CUtexref, CUsurfref, CUgraphicsResource, CUlinkState, -CUexternalMemory and CUexternalSemaphore.

    +CUexternalMemory and CUexternalSemaphore. These +resources also include memory allocations by cuMemAlloc(), +cuMemAllocHost(), cuMemAllocManaged() and +cuMemAllocPitch().

    If ctx is current to the calling thread then ctx will also be popped from the current thread’s context stack (as though cuCtxPopCurrent() were called). If ctx is current to @@ -21900,6 +22433,8 @@

    Context ManagementSee also

    cuCtxCreate, cuCtxGetApiVersion, cuCtxGetCacheConfig, cuCtxGetDevice, cuCtxGetFlags, cuCtxGetLimit, cuCtxPopCurrent, cuCtxPushCurrent, cuCtxSetCacheConfig, cuCtxSetLimit, cuCtxSynchronize

    +

    Notes

    +

    cuCtxDestroy() will not destroy memory allocations by cuMemCreate(), cuMemAllocAsync() and cuMemAllocFromPoolAsync(). These memory allocations are not associated with any CUDA context and need to be destroyed explicitly.

    @@ -22098,8 +22633,10 @@

    Context Management
    cuda.cuda.cuCtxSynchronize()#
    -

    Block for a context’s tasks to complete.

    -

    Blocks until the device has completed all preceding requested tasks. +

    Block for the current context’s tasks to complete.

    +

    Blocks until the current context has completed all preceding requested +tasks. If the current context is the primary context, green contexts +that have been created will also be synchronized. cuCtxSynchronize() returns an error if one of the preceding tasks failed. If the context was created with the CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will block @@ -22456,6 +22993,80 @@

    Context Management +
    +cuda.cuda.cuCtxRecordEvent(hCtx, hEvent)#
    +

    Records an event.

    +

    Captures in hEvent all the activities of the context hCtx at the +time of this call. hEvent and hCtx must be from the same CUDA +context, otherwise CUDA_ERROR_INVALID_HANDLE will be +returned. Calls such as cuEventQuery() or +cuCtxWaitEvent() will then examine or wait for completion +of the work that was captured. Uses of hCtx after this call do not +modify hEvent. If the context passed to hCtx is the primary +context, hEvent will capture all the activities of the primary +context and its green contexts. If the context passed to hCtx is a +context converted from green context via +cuCtxFromGreenCtx(), hEvent will capture only the +activities of the green context.

    +
    +
    Parameters:
    +
    +
    +
    Returns:
    +

    CUDA_SUCCESS CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED

    +
    +
    Return type:
    +

    CUresult

    +
    +
    + +

    Notes

    +

    The API will return CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED if the specified context hCtx has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures.

    +

    + +
    +
    +cuda.cuda.cuCtxWaitEvent(hCtx, hEvent)#
    +

    Make a context wait on an event.

    +

    Makes all future work submitted to context hCtx wait for all work +captured in hEvent. The synchronization will be performed on the +device and will not block the calling CPU thread. See +cuCtxRecordEvent() for details on what is captured by an +event. If the context passed to hCtx is the primary context, the +primary context and its green contexts will wait for hEvent. If the +context passed to hCtx is a context converted from green context via +cuCtxFromGreenCtx(), the green context will wait for +hEvent.

    +
    +
    Parameters:
    +
    +
    +
    Returns:
    +

    CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_HANDLE, CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED

    +
    +
    Return type:
    +

    CUresult

    +
    +
    + +

    Notes

    +

    hEvent may be from a different context or device than hCtx.

    +

    The API will return CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED and invalidate the capture if the specified event hEvent is part of an ongoing capture sequence or if the specified context hCtx has a stream in the capture mode.

    +
    +

    Module Management#

    @@ -22971,7 +23582,7 @@

    Library ManagementReturns:

    @@ -22981,6 +23592,8 @@

    Library ManagementSee also

    cuLibraryLoadFromFile, cuLibraryUnload, cuModuleLoad, cuModuleLoadData, cuModuleLoadDataEx

    +

    Notes

    +

    If the library contains managed variables and no device in the system supports managed variables this call is expected to return CUDA_ERROR_NOT_SUPPORTED

    @@ -23026,7 +23639,7 @@

    Library ManagementReturns:

    @@ -23036,6 +23649,8 @@

    Library ManagementSee also

    cuLibraryLoadData, cuLibraryUnload, cuModuleLoad, cuModuleLoadData, cuModuleLoadDataEx

    +

    Notes

    +

    If the library contains managed variables and no device in the system supports managed variables this call is expected to return CUDA_ERROR_NOT_SUPPORTED

    @@ -23185,6 +23800,30 @@

    Library Management +
    +cuda.cuda.cuKernelGetLibrary(kernel)#
    +

    Returns a library handle.

    +

    Returns in pLib the handle of the library for the requested kernel +kernel

    +
    +
    Parameters:
    +

    kernel (CUkernel) – Kernel to retrieve library handle

    +
    +
    Returns:
    +

    +

    +
    +
    + +

    +
    cuda.cuda.cuLibraryGetGlobal(library, char *name)#
    @@ -23247,8 +23886,6 @@

    Library ManagementSee also

    cuLibraryLoadData, cuLibraryLoadFromFile, cuLibraryUnload

    -

    Notes

    -

    The API requires a CUDA context to be present and initialized on at least one device. If no context is present, the call returns CUDA_ERROR_NOT_FOUND.

    @@ -23429,6 +24066,9 @@

    Library ManagementCU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: +Indicates whether the function can be launched with non-portable +cluster size. 1 is allowed, 0 is disallowed.

  • CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block scheduling policy of a function. The value type is CUclusterSchedulingPolicy.

  • @@ -24238,9 +24878,10 @@

    Memory ManagementcuapiDeviceGetAttribute -with CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    +Windows is supported for compatibility purposes but not recommended as +it comes with performance cost. Users can test their device for IPC +functionality by calling cuapiDeviceGetAttribute with +CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    Parameters:

    event (CUevent or cudaEvent_t) – Event allocated with CU_EVENT_INTERPROCESS and @@ -24275,9 +24916,10 @@

    Memory ManagementcuapiDeviceGetAttribute -with CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    +Windows is supported for compatibility purposes but not recommended as +it comes with performance cost. Users can test their device for IPC +functionality by calling cuapiDeviceGetAttribute with +CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    Parameters:

    handle (CUipcEventHandle) – Interprocess handle to open

    @@ -24310,9 +24952,10 @@

    Memory ManagementcuapiDeviceGetAttribute -with CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    +Windows is supported for compatibility purposes but not recommended as +it comes with performance cost. Users can test their device for IPC +functionality by calling cuapiDeviceGetAttribute with +CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    Parameters:

    dptr (CUdeviceptr) – Base pointer to previously allocated device memory

    @@ -24358,9 +25001,10 @@

    Memory ManagementcuapiDeviceGetAttribute -with CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    +Windows is supported for compatibility purposes but not recommended as +it comes with performance cost. Users can test their device for IPC +functionality by calling cuapiDeviceGetAttribute with +CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED

    Parameters:

    cudaLaunchKernel (C++ API), cudaFuncSetCacheConfig (C++ API), cudaFuncGetAttributes (C API),

    @@ -7288,6 +7327,20 @@

    Stream Ordered Memory AllocatormaxSize. If set to 0, the maximum size of the pool will default to a system dependent value.

    +

    Applications can set handleTypes to +cudaMemHandleTypeFabric in order to create +cudaMemPool_t suitable for sharing within an IMEX domain. +An IMEX domain is either an OS instance or a group of securely +connected OS instances using the NVIDIA IMEX daemon. An IMEX channel is +a global resource within the IMEX domain that represents a logical +entity that aims to provide fine grained accessibility control for the +participating processes. When exporter and importer CUDA processes have +been granted access to the same IMEX channel, they can securely share +memory. If the allocating process does not have access setup for an +IMEX channel, attempting to export a CUmemoryPool with +cudaMemHandleTypeFabric will result in +cudaErrorNotPermitted. The nvidia-modprobe CLI provides +more information regarding setting up of IMEX channels.

    Parameters:

    poolProps (cudaMemPoolProps) – None

    @@ -8695,7 +8748,8 @@

    Texture Object ManagementcudaSuccess and set the returned +funcPtr if the requested driver function is valid and supported on +the platform.

    +

    The API will return cudaSuccess and set the returned funcPtr to NULL if the requested driver function is not supported on the platform, no ABI compatible driver function exists for the CUDA runtime version or if the driver symbol is invalid.

    @@ -12396,6 +12453,92 @@

    Driver Entry Point Access +
    +cuda.cudart.cudaGetDriverEntryPointByVersion(char *symbol, unsigned int cudaVersion, unsigned long long flags)#
    +

    Returns the requested driver API function pointer by CUDA version.

    +

    Returns in **funcPtr the address of the CUDA driver function for the +requested flags and CUDA driver version.

    +

    The CUDA version is specified as (1000 * major + 10 * minor), so CUDA +11.2 should be specified as 11020. For a requested driver symbol, if +the specified CUDA version is greater than or equal to the CUDA version +in which the driver symbol was introduced, this API will return the +function pointer to the corresponding versioned function.

    +

    The pointer returned by the API should be cast to a function pointer +matching the requested driver function’s definition in the API header +file. The function pointer typedef can be picked up from the +corresponding typedefs header file. For example, cudaTypedefs.h +consists of function pointer typedefs for driver APIs defined in +cuda.h.

    +

    For the case where the CUDA version requested is greater than the CUDA +Toolkit installed, there may not be an appropriate function pointer +typedef in the corresponding header file and may need a custom typedef +to match the driver function signature returned. This can be done by +getting the typedefs from a later toolkit or creating appropriately +matching custom function typedefs.

    +

    The API will return cudaSuccess and set the returned +funcPtr if the requested driver function is valid and supported on +the platform.

    +

    The API will return cudaSuccess and set the returned +funcPtr to NULL if the requested driver function is not supported on +the platform, no ABI compatible driver function exists for the +requested version or if the driver symbol is invalid.

    +

    It will also set the optional driverStatus to one of the values in +cudaDriverEntryPointQueryResult with the following +meanings:

    + +

    The requested flags can be:

    +
      +
    • cudaEnableDefault: This is the default mode. This is +equivalent to cudaEnablePerThreadDefaultStream if the +code is compiled with –default-stream per-thread compilation flag or +the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined; +cudaEnableLegacyStream otherwise.

    • +
    • cudaEnableLegacyStream: This will enable the search for +all driver symbols that match the requested driver symbol name except +the corresponding per-thread versions.

    • +
    • cudaEnablePerThreadDefaultStream: This will enable the +search for all driver symbols that match the requested driver symbol +name including the per-thread versions. If a per-thread version is +not found, the API will return the legacy version of the driver +function.

    • +
    +
    +
    Parameters:
    +
      +
    • symbol (bytes) – The base name of the driver API function to look for. As an +example, for the driver API cuMemAlloc_v2, symbol +would be cuMemAlloc.

    • +
    • cudaVersion (unsigned int) – The CUDA version to look for the requested driver symbol

    • +
    • flags (unsigned long long) – Flags to specify search options.

    • +
    +
    +
    Returns:
    +

    +

    +
    +
    +
    +

    See also

    +

    cuGetProcAddress

    +
    +

    +

    C++ API Routines#

    @@ -16868,7 +17011,7 @@

    Data types used by CUDA Runtime deviceUpdatableKernelNode#

    Value of launch attribute -::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE.

    +cudaLaunchAttributeDeviceUpdatableKernelNode.

    Type:

    anon_struct23

    @@ -16876,6 +17019,18 @@

    Data types used by CUDA Runtime +
    +sharedMemCarveout#
    +

    Value of launch attribute +cudaLaunchAttributePreferredSharedMemoryCarveout.

    +
    +
    Type:
    +

    unsigned int

    +
    +
    +

    +
    getPtr()#
    @@ -20486,7 +20641,7 @@

    Data types used by CUDA Runtime
    cudaDevAttrNumaConfig = 130#
    -

    NUMA configuration of a device: value is of type cudaDeviceNumaConfig enum

    +

    NUMA configuration of a device: value is of type cudaDeviceNumaConfig enum

    @@ -20512,9 +20667,15 @@

    Data types used by CUDA Runtime +
    +cudaDevAttrD3D12CigSupported = 135#
    +

    Device supports CIG with D3D12.

    +

    +
    -cudaDevAttrMax = 135#
    +cudaDevAttrMax = 136#
    @@ -20682,6 +20843,12 @@

    Data types used by CUDA Runtime +
    +cudaMemHandleTypeFabric = 8#
    +

    Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t)

    +
    +
    @@ -21459,12 +21626,19 @@

    Data types used by CUDA Runtime +
    +cudaLaunchAttributePreferredSharedMemoryCarveout = 14#
    +

    Valid for launches. On devices where the L1 cache and shared memory use the same hardware resources, setting sharedMemCarveout to a percentage between 0-100 signals sets the shared memory carveout preference in percent of the total shared memory for that kernel launch. This attribute takes precedence over cudaFuncAttributePreferredSharedMemoryCarveout. This is only a hint, and the driver can choose a different configuration if required for the launch.

    +

    +
    class cuda.cudart.cudaDeviceNumaConfig(value)#
    -
    +

    CUDA device NUMA config

    +
    cudaDeviceNumaConfigNone = 0#

    The GPU is not a NUMA node

    @@ -22481,7 +22655,7 @@

    Data types used by CUDA Runtime deviceUpdatableKernelNode#

    Value of launch attribute -::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE.

    +cudaLaunchAttributeDeviceUpdatableKernelNode.

    Type:

    anon_struct23

    @@ -22489,9 +22663,21 @@

    Data types used by CUDA Runtime +
    -getPtr()#
    +sharedMemCarveout# +

    Value of launch attribute +cudaLaunchAttributePreferredSharedMemoryCarveout.

    +
    +
    Type:
    +

    unsigned int

    +
    +
    +
    + +
    +
    +getPtr()#

    Get memory address of class instance

    @@ -23061,6 +23247,11 @@

    Data types used by CUDA Runtimecudart.cudaKernelNodeAttributeMemSyncDomain = 10#

    +
    +
    +cudart.cudaKernelNodeAttributePreferredSharedMemoryCarveout = 14#
    +
    +
    cudart.cudaKernelNodeAttributeDeviceUpdatableKernelNode = 13#
    diff --git a/docs/module/nvrtc.html b/docs/module/nvrtc.html index a5abe7ff..8ba4a243 100644 --- a/docs/module/nvrtc.html +++ b/docs/module/nvrtc.html @@ -7,7 +7,7 @@ - nvrtc - CUDA Python 12.4.0 documentation + nvrtc - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    @@ -152,7 +152,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
      +
    • 12.5.0
    • 12.4.0
    • 12.3.0
    • 12.2.1
    • diff --git a/docs/motivation.html b/docs/motivation.html index 078d81fc..1aed632c 100644 --- a/docs/motivation.html +++ b/docs/motivation.html @@ -7,7 +7,7 @@ - Motivation - CUDA Python 12.4.0 documentation + Motivation - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
  • @@ -152,7 +152,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
      +
    • 12.5.0
    • 12.4.0
    • 12.3.0
    • 12.2.1
    • diff --git a/docs/objects.inv b/docs/objects.inv index 784642866d4551e66edc9306f5a5e95a365304f3..8ae58f86476485e879a5b2f7ed443ce4a9a81cb4 100644 GIT binary patch delta 30637 zcmXt>*%}BG)2FMevk4?rSUa4JZj4Ku1_qLTWY|j=rswOO zd_E8{l1oS@F?@$e?K&QBL|60^CPo1P^FQ!S-k=})(vDBYwrSe7p`tgDP$~A{G@xSY z_*(7$`kHX5U;mi7__DcO@}2q=(%xy>sQkM6Is!hofiLS9p1?24W~^=#Nj9eDtU6rz z0e5~5D*s$6Gwnj5M~Z?3W*4fb-2$>t!-Aw8a+qPPS@fUla6^h+ zgq#jZfC$X!%(N3D6mJyXZkullcN;GdBe(r;+|J#>@mD*-{s$cft=C?-SqlBpQ_Tl- z^0sz(Vp4tMk@6~NeV5v=w=$3!S6qLr6U=PnAF5(cA}Wy$v`&(P{cN>f(rbDP-8J`? zBE_gV^A8UodV45v~Y`#5{l5{zT<-vNq!D z>>C1lxhr!-j}cK`!4|VYvWSEaAW@z{8lC7b5Gss(DYEook2C_(l3fq;vs&0E0i8l8 ztlcPehG~=S7dA&9$;v2jEx^^!F2UtB!B#8eID(P>0eTz5W+Z~EIltwsBB08g zOIGzMH+llHI7I>N$E&YrN6}eHi?061;Mm`f!8Fg;@IZ%;!p_r`JM8TJ&}wjYr=O`A zKNs<;!NX3IoHi&oQ>5DoONOd5fqJgy;!0~sWQ&dv;n3+cvv%|888nWl>CzvG8xo|N z6pb{OTn_gjwQM=`K5})>vbO%i%+oe?}3iyjU`Z z;T>LoI<=oZZG^fH`o3oKk$F}BtoN;`=tW)qDB@1EJf9x9+(69Y zcK$g{3Kq=9jXcdC>-Y2R`3BO( zSl*I}9P)lT6ETxmeUiw_E}tp8i!9}XtSrPS&hjqie#IP)unSm$Y%{$Fl}jSVNn+>Z zmM!+W+GrZ+=KF1CIC-7MO;RynWN|j-iI*>aH)v}=l`c}t=lm@bxHeglqWO^u;k3vi zAIXb4txToHAR{3lnU%{?3Zx)F&F1PqmtCb6OLiO=ccYgRUXW(LD$MdW%0fV(ST0XK z3{I(z*hyfRxKu`ZB!&^#Wn84RTgewWWu4pf#5+*)**29po$=lz;cZd){n+d9TdLs+ zTG*xNbp6QFCpJjP)sMVJ-+(*^(Glmm(C=-rV+sh!fl)1DFf!)ni1$uMVc8J9y1dPNPiCV^F;tY7kc@7#VW8XH2#bO+YyD^Dc z&Y@YWEom%4XvZCeL1aJZ;GAozrM*M6Dy$!r>TlplSA}MKpo5eGy&vIMWk~)+IGwqzef5-y^*s1O{TZKMZ(fA&qU$ z<8#FcrxN&bm1qp?(cywnr>7h`civ!smfY~Qg@`nG=av^zeP9E+Hx)$bDTJwaz z@^L2Nc)-d!0vuvEk3A=5mvy0h>~YurRV%tl!*Pqtt->W&oY5NbgA+<_9h)8~>36dn zVx_+w!oj!UJES07=Owl(R59+nhpOATH{rk8E_>9&jR#QRt2p+#zPU*28}(XzfJkiL zg`vKps%U=6HZx+Dx(=c0{9?KsCHsR}a$uj$7GWg}cs)loY}fTGj0feb_{Gop6I+oC zG9Qb^H>(0}?cgFy3cqUF;3d`-pt`dV>Rnln*T!<~s3P(-4;03a%f(lItgt+X?#~b` zwk`4O9**!DewvkJ*t2t3xhoEKUWxPnXs$*=Gt9*h|8T$SoJkR<&h2)Mk8qYK#l^e~ z$C4}puz@;KGV!`5XhHKU;Y)+WfB#n-Hcuv8RE!_?d?bsMh0)&mlD;KkXgVM~w(+>2 zgPks*->u+u2B{oLjlSlQO+D0gN3<@lV%$VWM;zf_#ADwTLJPl57#oRkrR+v#z%#Ga zc@qo9qr<*w4rpOv?>9x61sc=i-A6lHp-tVX9Vbjbwg&}>p>Brlg%gE>RN8@1B;^vy#?ui6&&o;ECRye=D`1>6RGX9(K1og^=wHc;OL+<4!!`3QVx zvXFX?V%Gk39z1Q#Jqs++q1E247xZ&4EJSe7;1p#QQn8|m znDq3EEyswg^ts8A5jyyleKqp$wu%9Y57?I5(iW!RC@hxQPNg9vt_htG1(sgrH&QG_ z*`Q^TW?%ag3%5xHXkHc`@sC+S-??aaB_of%)hmbdEB$Y4y|VN#au2A%*zq*BhEA%1 zOFq0aN3JJ&9}fnWe0#zL^_NrgWBXR^3z=)%G|oYD`;!DCnx~iQL00!=;Bd$bW|DA) z(!;XNdq}h!^d9kiu!+2Hn$2Z;yUpK1t6*^|v;OVS8IO$&UB)$^!xP$aDA~oJ_ zEBPH1Mx>P-4N@07;m)c40;Il(Z?!zjqEDeggG#?xwQ3d?`v~LA zF@l1JomCTyA~guM-|Gry!0RgPUoP$U!bg#(Q6hr=V}6pblgwg9E}JP%db+^-o~SSw`xn)DI&(t)U|UqSVVW}r?W?l%GyA%oT)?Og zVVEaKq03Z3+zN33@d^p2D0^(#?nF&~2c}7WhZ+;f83sTiQRBY(2G`b~r|`l1qY#eD zfR##(JEq`Ykg0PAithH#*f-cm1V z%?!qRw(XL}YUPv(fJhWBSyd7NC>aYU{i2Lk(y^Q21<2;j_=tP@3mN|_KsfqXWV z5Cbx^<9~^g%*=vW1*$8aRVr$6#qC?=mCv{^#}+QaoK*?^PU%T+oG+;;*CDe10hFQ> z9p}JoEVvshS4o^_mLH)!SJ02nU_N95L(vUo4Bk3RZ3UFU{W6}DDm|iw=bo2m!)wlL zzI(%)Hoa|@3qy*hIX7dm{MbhXCM0Zt+bJN1uQs>*lf=o|X5gJEMHzUW(nYgLq*wrh zPAyRU#k;1Vz?Pn_dGX!SX7_U)EN7>&u~uvR)XK;MD$v?!8x||xo~P~tLSy@ppB5o@ zl6x40Zx=^}IgtXH5jf-DI~Nyg)fN$Z;nSM2tfMO#Ou(kQ{LnjAfbJ%Z;B z<~z?8M6CPhk`x1h7~i$h$N~X(5cFI~T#UeOvQlhUy7cx?03gTn2*Y1ShI6vA&wpB{ zNz&wuaI-QXejYsC!hCBKIqkZdVcN8p)0A;Q7g9Ac72By**FV1O;cqTba+ZKez64ey z9Yfu#>^!(aE;L2Id0-ZAzdgsX*O-#7duG}&#I_&@t_m{ZOwi7=_$!I>xq2w-j1y+Z zGaIAiTg)v;9AGC-3nZ2!EAz3I5C`PZFc%gyfTzxYp7Vzm%eBAqJ`&heKNr6yjG(#lH02fV;6= z={vKkzkn87&YZOBD}AFK6hSWVf@^VkLCyH$_*9*}YDd^MCWy_ZwiWS6^uv(SM52D;@>9@2{~)=EBrxy0`4I^KORV@u$xu#F(Y9|mG?@ShAJ zMtzDrlEb$^t7N$w`nR(TcdE*(h`>~-n(&>gAq}@(11LSyzUks=Y14qbbn}C1JafV- z+-E4Re!viFck;TC8fq?9CRX^ebvv9CSs0@Nc6!_0&Mjz%?H@q1uc8R-{xH--)ngee z-3fXTi)rlB+}g0QOLmIsenrWIgdHY|I6WGhw>!zNxSM;bSuD8-ayj>P(R>b=F}#N{CAkM(5OFZhyqQ1DYGq_%CDjy zcsTV%4Wj*}9W4=uN8}I`jBSD&9&ZPm^DcS0QP)Smoz^;jo8MC{(gx2#cth^2%0Z5* zj>mMUs#Rnh`TGcp1a2}MJ@is-asl42p;(Ree%CJSnKC9fJ7uQ!SbLX^)JBxMpx^c$ zS4Qn}%W!h@${WdWvVsyauw;5r9OI}O5dXFnm|y$y>PtxPJ=cH3pFRW#IzF+3{CMw; z!uIyR>hL!B#^ik&7eH-txdJ&4?AlIcF^}Bu2V=K4l$ZI>ww2B$&2ThdcX_X8-HG6V z7|n~8hWDol7h6ihx-ZKQxi{vcPNvgF3E@64mfKN#yzuIG3l*=@eswG}_Ilg^8AM|- zLvgaC1oWdJ#?Xfx3`6=P5qJ(sIZ2iZeQSm9(Bco{j~UO<)Uf+Jl~>xDRQ@W+xzHk; zS zzK%+^@@Ma-mbKM1zvJoT!v~~~8Ig}4Rq~=`Wz7|wGMIUY_?ULROT$rT&uID1luR~y z^t@xr`DcKSeRMz;I1x$h#jggVj5l)KD%@&9L@4K)eT=3Z6GNOk1jPv;C&#UFkfoQZ zrXg$D z(hOeVXOelS_(rs)6#=8&vp;TQj(RDM&jI_D3N**NbSdcU11<9T$8>wU6aE+j*yv}A zprkky3@Flc@rc_dSe+0?PS%`NYAR(OQ9*oP20Cffxs}GtQX7m;@xs>)HeG+PplVKBRhl%d+%5t9slw z8vJ(LcWD;nYgTCnuqZvUe{x{%{)kS;Rc8At+g<-ElawA=fZR!XXT93WjDdAvyTd23 z{rqJ;yGQ*n{O0%;j3ueV7uf6yWqBcf-DDDJ0VBV{CkFrMCR5&sTo2p5X|;!!-fHOb z3s_!2=sB|t-j)XNN#+X?;wjC-*lENHC2>gw#!*gGpyVIx5WCsf{b_9fqF>NIit|MZ z_!L!bYVNK1eBgPhI@!GrF4?pe>JtcnvO7LyGgO&&ea6#&owjt$GQzIY+M^?7{H?_+ zy}kCFykqrI`0Xgv38V^R`&Tn^S*quK>U)LIitr8Tmu>;j&Dg}w7vw*j=Yj=!Q^pdYN*R<{x>9qT5(~6Dg0|jFTXoaQW?go2A zhdUSZ>5J!?(ePJ8RPE}gjs(oU+vFP{&76t0w%QP}1J}{%ac@ZMcnLYK_9S5aDt_z; z3?6L>DgnTC&((#W@NPKxYIFn9uh2UaY>dJO=Xx+Jyp#8CU9ot24~8Ku408J0#pfsn zJ@5J7NH3tPkOI0oVrK9wdBp}?BSnwNRREF*TjOHKNg&;$j_o&>MC-w>?S*^p8_6o6U98_>}?rr-yO=pjuBbD8WOElFw%Ie*Vg1oRK8C zyrUdgi%efag7i+cTSMdc7JY@)MQu~7ks-34XIde;u?#z=RW~E7@ii(VVjm(UXxm52ei7=@P|O#^dhBS zcyOCkb*~$nDNloyGH5ml317eB|xe0f>t=1;L<6tQ9?><&MIF^s4UPvM@gd9gaw~%^*g@+>*adV00`h|C*71i_*QN z4XhS4dQjnlI?U?OQ9AZ!J9jq@dGaF*tSGu3(tclGb|0h~pv6vIA@r+!y9P-2Lv_1U z%zBnq>0e+X&XRm+t4a)Z!2xRug-%(*yCpetLZv6bU&@Dw_R+;SK2QhS&?AtxuY?j; zUC?&oelmVSV4lj(@UyBahqtiJX>;BjX8D*GHv5X3&BHY^2M|i+uDQFbpzro~ME`FwGu&a?|MJnj8w#FOF?bMOxtnN7Ti52 zF6vya5Xh^?4Fn?sp68mx${+t7&eTW@4w9cl7LvAb8bb1qRpGpB|F9^- z+st#y8-QgyeKi*uc`Wto$z=+_162|+g3oEisL!oHId3WRaSbppXpMhvBi;>{|wM4@)}+)wj$Gt zk4I|yd+VwIgpZm$2+l(p?4&~jTQ@BvLHvj$e>((8(6^ghS~SS^EkSVc)=#f7R#1*$ zRPr@iYIsUi$ka~PmHIiUoTrlWhPrUJeP&&V zn4Axdo3T4Y)HJn((5YK;8(VZKzB5`h!8`qo;T{MBud8xpx`i8k^MzORvSt*G7yrS6 zEb3JKQk7_l+0O#(uUVuuBDjPiJ@TrZr(e18=y3{PP+YwNjts)aXINLr+GkE zXh>nyHvJoTksj-X806ZvNyteg)rahz6yzZDX3(l%TRjQ!+p9As$AgS9#%E&qeJw}> ztiX3;(as!2Eknwp!6rXJvMry;LWTY{t8c!w(WKNKvrfjoQokXJ_#GBACVJC747 z+$xJiN63T=B;p$(u54GoDlGUO=PB|YT*zp;FJ=BoiX)RC#-0AEc%jY`FgHl@JO}Z_ zdb)>EAZ-NmeE}7w%K3EFal;}3*|R4OSPZsfcfc95K>HRkr&efRc^%n^<^fdd`4OEe>583w;x{qa<>n*SJv%-_@g7C8duP~Tvu`-^9(!gDbnqxK zUabBEWYw|af#^Jug zwZ_fcXQ}ebkmKn}bL!(w-Tn<9NIzl{yL&gb$Tmcnx{KUem9XDF&CjLU5aE`+$KDs(%y%JO(v(ofuz^Jbu1EkmLvx1GV%VnD3;iD9KP;#7&n5SC4t zy;l>G7hUx;c4wEybAD(^&5Yr}r~Tvh*BixN#Y4?T0bFLe4#QpTH4iMHsnAscYreZjVc;i0ZKmsjz~EHul(C zRg(5MGmw#exwzp^7Ux)OfQ)dD3`&XBB$aR%J#-7%65{v+=0kvW6a!x%wY~MVyfXUz zKG&K}x}HP*jb#2DyAkp@1BfX+PWOrlvbaE=g4$CHyY$8FW#UFJtOF1>_lLqlpTj_t zCUJ@-?_VKpK|O^;fmnn+Y<_-;_SW|DN~ao-Q3C9-kOxGBl;CGz8`)@xxQ>gi8B)*8 zG9pKkYk+&k1)n$~^hy^3%rT>w=6!trx8~r{&$sA@bW>d2tDQK8x2iJi^Hbg zD7A+v^+)ddy~Y>|eES3wF#?5v{s}=2Y7Y$o^%L&ZVAdv?42SAlz&EN)VGaduI35Kn zFuP0;`^@GElMz_SxG3~#kr;Fl;(tBhgj9m-2XDq>#V@Qh3;g&`-CJ(3Dl?wl==7x9 zDvDQST)jB~X>4Q_7iJ>J)rc4vNszLf8q?~gG!PHA@e8*Aa}i*oJriz_p{Ebr{(ZH_(P$A|#}hCDJS~s^g0I4+b#Y8ADzMx!#C% zdH-iX*MA6#^KJjdhfpQfXHNL~l`Y==PVLKhO zb8XhCkFlH?Z!T+<4{+$&zJ?3! zy)%GM_|8qNUShc$!&!37@D*;pX zmhvYafoF~Tcbl8G)Paf$la*@t7aQw9O`G6I{r zp7=^F;f+cQ!Il#Z$+HLqFI?gSdNh#r!3(P5?2^9YyU}P$UPM9;q#W1Ryrg_+iZPqj_Ff}yEF^Te*E>6sQmok zorKJvW167Jz-1{^5)9P~6=tz3wqG_vQ&VZyOEsg2*9;J`7X}nojN2Z!2Mw_!Ya<^8 zLuBDuCusOngOP#hYl_h-U+2*x6Zb@!5`~uR9=lv!DKkgi(R6|(msIOF{lJ^Q=_cRh zU%_uuD+ShPugvc#zBJFG9m^XuvVI%n+VkTL>a>FN+7&}S!0u0>f^Ru#TtZV~Vgz z&#R2BuGAEpCV|};qb?_k&zr3*US?R%w>qHCNs3)o@)EmGH`{a{xk$4G{q zke{qJ6`ToXN((<6mlzA|NcMvN_6eT+CxfalbD)p=v4%c>x3i?bvQ+Z$_FyaJKUvio zi1_vv!;NyPMmyTl|Ah zqq7dPp43d?h9|dlgiL(pqbIbaOzK1TXP-Omjt{ zso&8BFD&#rr5Fs^41k3{9dpIx?cShAlY5{vg!M!~<(E~m#hStJ*hemdEv$eI9x>Qy zy``IZZ~yJa=KWU=HI~D|&|gdL@VQRQc~ypans2Y8rX?_3Ueh<<#gy!{l>ZK;&M-IK ziK_AqQJ5#->Pge#t;?zktnhCm&Mz?_rW4Ap@g!%6tP2U$uqk>&%>NPM__wz%bSq)v z{?&R~wu7+?mPPW+WYbXJdC3nW-<|N?Q$%y(*AepJV$)DZbv)+x?gH(tsG4wn#mu+h zZ7?{YU8}KqL>>2-n#2K08n$?yEGj75Bh?vCrtYgkI>Va{IR^t(N1L&Zyl+u*oX z)ct4V)4%O8^<<^TKNBuD#}bLCuz<}z@``${G0!ZfT|<54l+XrMcG<1kC$oNNygw!h zDem(#tUoe{eMnx!Vadvto%(R_M010<&=2sqCn-^;xzg8DE8q}~se-f$CaPxD;=B#C%iGQ=O?}EZEU;vaH2^fB`UQ0* zy;a2?gqV@F*|;Q_6CF1@mS&t(lpQ2#Z3kv}?|t)3O}A3N9gPtbB*M=SqmkDm60cB` zL|6WA)du?a8XNcm8)+W1ONXH=8lUWl_ldn6S!m*Zcmjr>S%mQI-Y+!j!CrGt>RP|C z82`GCbLe3f7eO<)E*_83S_2#4?rvoIm*!*5)3UrF7fi0g>Fw0k;Ri^}8g;hvE z#tU%{`PLHzy%q&~e-HPzd!D-9x_Ou=m}e@Q7hX~c_K!pxX03Q%RR!v4gAyz-5j+zd zk(BiRayvO>s)u?3K1#L7O49d&ws^%vqDh599}+mdUUp?B zQ$Ayq}3tH4WtLm#&Uy5og&XxxoY2Q{&+YAIjvE@sDDQ8 zv&QK4Y=MWXxbRH59|uYfpGsEJCMrg=2aYt+UDdvNEKkhWeOA4m^7NI|&bO;ahb|>Z z=f+^-WZzirZ9-@@!AP7=W4v|t-gk|@hv`OrUTBe!%z_tYo#cnFA4kA+uneTp(o9rd z?cOMlwe!*rNF-ap#_jLH9gBlwG}@r99Q{Ata?{a8L>b7U1x4~!bD8sl!#Y~uT6IPm ziow?;=M`DZ3n{BTI59?x&A-8^rGNbv-lhsD5dAYqM;kv;2s?3k6RFV5dox^E?ueJW z`F^n_2B7&;<;rEYg%vxM!+Yl9PZ24Z#R$_LJF}p{8fBXo46$?Ct{)}xhfjY?G{UVB zDwv+al?L=BOy*-o&`B?TVdx#}&mieY4u%)7c98Hr! zZ~CCKF+nn>t2=Q^(80}5epHisl}v;0aPO0<6$&M?_^SJz2A*^nw+XA*S8sJ&^QpIs zJ&Ej;pu(=}LfmS_$IiX3)ux6?nY&(=YjsW|s{y4uiaDZX>Ef2ISp$CKggJ?2Hja^c z)+sSMTsGyN6*ifsx$fso;!8mmHPr+KMzw0dU32U~?w zbS_Vj%F&G3B9m{>ELS;=od8P9wousm6_HzG=XO}nQ;V)Kp~Krp;~}JXnU+ea z@%!kImV^_}zribiN76s$-qF!`4L&e_W&!672NlWo&DNB9OeMbJIQj#K>kwC~^V3|? z>2v5m$5J5LG3@EL7rc98p1SegFj~8*8a83%Io0AUrz|2%mlI?}8i&=0o7Pm1rGYod zB+d%joxdRu2r{O+OC9xetQdL0>ime`A2BMIr{OGh0wZ!DDEFLE%tao2WDZ`^y zmHT`do!eYlQI}ehJD)LuJq@rQ@&M`PnD_2kfTj@2#B~{2tnd$P^6Es&{L#Vn@Vc(l zv`cRsn!Ra%O6kAI+In6!I*+!GOPHgu9M~x(LQB^RR`w5zc$C{Dk9ESqkiSsJA?5^#H_4p(qyd zCh_s@J{V%h^D~BQ>-}+3u%WxN$yf@36-C3$>#(K4@Ns=5X4XnKp(=;&st!(FRaLDg zgB?ZC_@xH7&z}}FVf(<2UbU1~W7XAtJW0!B{9>f_4bWwqJi2htb7w@VhCe>`-dWfs zIK5zj+YD$JlOgfRKW9$hbGdlb`tV5{Llb=NOtP51_SojQ@Fvj_5BZS5HRJG<3hBHH z)RO*TIQ7y7$*BDz^&i876?!J6-xGToFn}uyG*{-bMGJ`iGDy#Vj7vtS@ef#}u@yAp zgk2842Ock^x`NxPtfmD-&0{ZvkHY6^8&qBlQCcS41-x&4%RDnY`C|Y-)X+ z9%Es&Pz^H#7^heRxXp}sjhpjC5-Ocf))eYckD%q}KHYQ$Q8inU`t^jGH4GW6G5~)r zW<(KJp=ge%y@oA+VOBMIUb9WWeGlHjqN1AhXQs|1N=~6_j;C(GSsS>8I)j;?L0q^k z^juYQq5r=-iBaSy_D!se%#-lJQmG33pdE3H*-`=Q?2KUO#P6trwzr*Ue;$EOntwKn zRuI@AE1sVM;oJLe}@;1I;W4@u^d; zAK8)uv>pqlsizM^BM8nP_J3Yj%c#)ZC0;$*OM+?ez2JYAy^xI(9jyjbM@+>p!(~&s=1=R42}= zsmmSFM#v2+l}GXWK_7a-0-#T(WJW6FtG-!b1Dd&lhTjR6+AoyC#OW^S(@`})>iOfH z0Z#N2ew$3W=t}Ye z*ne0DiKb}{THvoF!BFt5LOg`jejSNA6GWUdK(sf8+mFHyQ=$b_l6W1Y4othW!c9UxN z=Ja$ks2is>l+kK0RA3N86kSRVVkLVmt(4vI4Qb4232l?28882F-rp!#LTnRBgW+Tg zRO#b61B)!}mXQ40buoB6@AmX32YjGy)e%ks+q*rAA2QVu>hg8hIe!N&Ub|sG9+AGi zdzWTF3!tACJ5V{Ei{^98Ur?^vt&Y=26mHA^lU-{We}CoXsJ$Sg%olsq%QXDnvNz31 zP7$cBCA$pcIUGc_N6FapH24yH_M;~%fDU{*W7VhZK*NOwP|4`<f-myv~^QtGD`Dor7O z4f$y3EY9nW+RojD^8d|}@J3`BMR`m1u3B6>n5V4pEF`Rb?@$4`f7qkDSrQ|F&*2O6 zTRHYsDK|V$APt;ipZFE%z9+G?e_F@%Xu+@mPQlS;u(z0&>RqTCZ#!K56Oq^fFrR)& z5YJGVk(*hN#ZLJ}9#b#3{^$Y^R#e}r5#mWJA;?udH>qYU+#$0Z8CSwQPJfoLhimYS z@$=W=nq2o9&BE$Ya?tF(Sk)`t-9uRR(<_%opX*DvR()vFhro2C{Ji~%vw@!4oLj(i zp9|ssO@B@ri5oiB`4p6^v0}{H_(TDpo;qI z4kP1FPW&0)td28@e}*lA@Z7PLTU-PVL<|v{AN>M687dK^S7-~kMdZ8xhEJqv6vx;y zT(MLXz|h}fZr3~I@1p3hKgr)VZo9;3FqsD`sSoxSRlGoHcIA9#>{2i2Cwf3z{h9*G zYa~@VAwPen0o5E5HvFd~AgXM=?r>yBz3oC-X6JHF%5F0L#f(f{Hn-u;yh(FRy!Y)? z3yVqz-Y~O5iKT}_lQrynz+?q=y3;=c1jBX}ARqnh)5vU_K$!kq>^38`y=!~AYEFG? z-mYp#1iSK7Y%i4Ft!j3YB2?HL(a>>9}@pU{$HI1se9D(fAkFfwcH15{AB;4Fi(&^ zKsT$Tx%0AamdnRRmBc@j9~8U|V>uP!?>4cLCEhK~!Kw)9r(n$Fgqe&o`X!XV^P$`6 z+b^JXDLooq3-_~LdTt{~X?iXBjEYh{H123RneHm$7bD9t84+5OPkc{UzdMVa8htKV zMaxHK}jOLSyLHW_KF8j4GoLP$5p6N~47;Xx_^|J3(L znv-|Dpw1`rhh1<(XfM@eM=?ZGpy`=j1nt};zH+zo$rC5Kk6pzQu zZV)_mOnx=l1HQz!F_*w$GAAT&k|!tJ7{0b6wCuqPiD={2#DjvO|GbNjH7zXA%{O%k zi9IEkneq}jEw5K2W^D1hR)dgMeBDE*%tip|{-P|)pD{t*?g&Y_WjXS{ zE?Zg}`PR)w89M^HYcG`2Y&vW7tNH%xrolfkEE#XxQT%Ry1Ei}qt<4OUIz93Tf3Lta z3deXqY9`z0w~}8nc6Eidw{h3Cs@e!7|8`XlvtliA-3)s;AaW`xolO3SES^D=A#om7 z5+p2|AQVl^aQ@M#C6QL0InSI83TspF11(iDoTNT3Lc)ezG4J)fa8g9fmX-p;nn6;! z%{wbxQmZqlMRWk^2a6&q*UScK*Q-t9@AG&!LE`8cNM>)-oIJJ9K4ldQ8oJWc-^)y^ zN}#(1*9Z@82`N+geh}2{DG4}N4c+#AiGO&B0P9texspO^$c0`@q}GHADIne{H0lk$q*4ITqK*=H_8lG2Q)dnHUy{Dxl_(EdV|>S(ICb@ z3&HgJA64fs6b#wc>oDpUPko3=CLuee|R=&q!J za@tDY>%v~XNcgb|>2=AF{M&!bYbK>eB3nIUG8A#+tjGcO%W4V9(CD zTXFl0$|R2J;rk>doRLdU2S~B&+2%$aBtot@WViepCi32;84iNS%1r*fXT7i|i-0r0 z`t{QS%g=hhtd8MU-r!My_yR0*(ZMRrTD>8-GkfaG(fVDGwCtG5F$#F+1i~`6yHBwL z&o*|h?t8xU!6g;dp9@|43f?~67DZeSM{E}3UT~sM8)D~Lt54sUbvuadV2MF?)O!_s zJ-cZjE1wisth)74XxpxI3@|~Z*RQldEjGkh3e|FwrPDz<^V?C4XG;rhm3N*WqL)^| ze!)1Q_?2(w8C>9v@x9ZM0BM~*&?~_!R}1Pryfb; z;gND%i|f8wu`^h+R+g zgp1aI`h<&L3;wUFe;7`a6MqwJFHl!fmoPt&a2W}eGIjGF4RHbfRZ4E>2Xw1$&FZz{ z0#ncb+%iWqN1t6-I@@Mltdr(RcocAwY+1(0cLFsmLk9(=(0#$T89|1X5Vu(*ncF|o z!@0qMwkqeT?qtcBX5*fk{p0Y>Yxd3G%$jpf2U#i4-X-fp62Jo1AfDHfgpBakFrv0E zQOtV4#=%B{se!c1K>@F|uLrDnS~YiRT>9p-XT1Ywu{z|opfy5 z>U3;%Y}-ycc2coz+qTiM?R0G0Nhhb@|NE~qIfEKiU2Cwb2DR7T_j(pCOsyoAp#8uu z%3zrbDg7#SLDX~exJpBs(qNuIU9?SivU4fu8ceD;kjcpnGELsckEcGfhBah0D6c@6 zcQrwo`%0i7|F~$yY*l-Iv&1)iuOe4oh)oMKfU(2-4cV6?ErQ+cmBO=r{iZz#^!@I4NcR^3K3OeK zz`v67^2|8wGnSKq$z*Qk}JtS$##R~xvIAf%anbA{;ByN-fov#b>pxK1gIHjf=#VwK8*64f0YKw59R6bmB`sLiZ9oAg7QG|m9(&+)U z|20*SND4P?tko0Hbc-;$axGoZp=wOHVxZUeb&9%e@FR5R-6~1l-a09nZF?kozGu{c zz8=I6uw`kEh#){yq~pI3MBhn?r$4Vsyo{;tM}N5mt~5attJHeJ3U1=5u)~(NZ|7xS z0>@k@)C&LJJAECFL$1insNm#I+;sIn)>siAF zed!u}9$V{yv{N%J2;TC%+rH^Hw}x{B4djT&j2iVjCTywqaS}-Ix~y`RX*2B}8OV{Bm)k$E{zPN#gLV3~~NEkcxS7d4((D;2o@kc9$EV{q7VP(Xx`-=4x5ZI+tg~UAPCKA@_qqE{qdsdxPoc zoKmxc(r;1K58T>3+Yz*Vg@rRr<&hk^Bk8uHHEg?NtE4GlFtcirdvz66T@~!6J zEa7&prvCB4xrjUt3VVsG(LjQ#sKlC$mQ}^=$}Ec57yfAtN6OwKu42ei)&la=vl9>H zA_D$_-^_2nYqJUn>*~~XH2*c?KD5Nn1ETP2#|Q-b6`o&vpOzSA2e%9u!ZgEKLmqY^ zp*G0)q)zyRTTKehy5I=zi^q;Y&eKN5!eOb-$+z0tR#G6HwCQD3Uae*lL;u4iEMQQ~ zvRyFF-Oco-I(}k8sulmoY-kOXWe*?KS?EE5a21B?P$-ohDNZxN;n*!0#1E_*>9COA zM{W(y%9=JV%oi1B7kLN}lQ`gn1f-ILFn=zvy-Np8`&BST_E%*6RpqmXrh|916P3(M zq-wcVsY_n<3QWc1fQ#V3-eK76g9lr_d`zKe%8Duv#l#F0(t#Hg8pH=>fNzp^abRM2 zzebPbcoWQ+g69?pUcf){cfXXkG3e`RwQ|GJKv>r zs@{6$f@yrzeFFz6gRGy0_&BLS4S!~$;AqSP*GY4%vqTHty>Fqejv~_mkPzU&=`64t zRZ~N2kmw+a)d&FhHgTl5Db$@zu?=HH5?48X?hQ`tzAkd%8@{$`Q9v@w08bB<1e@QFWx#D@1?6lE zPC+P21DtVHFtX=6vY0~w5Sls&0|l^LdFAWFe=HYyni<{+~qG9vy2Ezc1A0(JJv~+jz|@GL5)Y&y)mw(_bpuM45{hvDL5-MzF1F_yI0d^kSVnCD-zQ-LHpK~2g# zLWTm=(8{*ocNY{>702DTLCnyb{}#Nn0nZ?-?g#}ft&^6QGdss)T6H(PBZI8|f` z@8T>6hy!>>2NuW}8{xA2E!SE?ZT)66P=Arj^az1k;zwtPIAH$ArNQL)hq2+`dBQ5= zjJ@B!mg9w;hgtV=4ZZbMv_IHj?#H#A4?lEzB1AUEHx(&i8+e@BYM{@{%N`erPIx)F zJ?QFwd3X%Kc(X{PKnci^PI2^c7g7JJ8f7;&XN|7)JdMF>lX2jq>)|+lS_}t&20Fp} zy>I+>M}ql%aBdR!hy)N{9CjySaeTFqG%sg}esrnmeUy5llj@gFJddzx9|F%GQFfzK z-}v$Ot1FaeW;z6mK87<_R7~17=9gl?eNYsHe{TGv*os^xpE9v4tQm^9x7|a}B_U5< z=6IRd&x$~FyJLt5^d9FZgk-~E0MNTC2L0pU^~E)nD=zHjuf7TeGU@`)Cx~zZu|BoE zW!45;enhmMklEvtYuqiWSPZ?uw$GCr<#jd>^I*rG{OXNOK_6%@j#KfFAVZAKz{oTr zMr3%G%#b#kU#?3zVCdz}kn)_%>=M^Gj&>QPHsw|!mrLKbC^sGD0Y14%V3Q=yuc`%u zkN9*KGgTOECXYhDFcJg8jy%T*+-`TpCdE!<3z@o3YMB_RFR-B__nWDz^_HYkPXib@ z>Wtv^cw10RS_iLMEy@qUG0cgHCrj0B+@=ZTC6QqJ0j;9Qz-smP?A&!t|Hbnp5#8F% zrV4x$W@oSo+8>x0$Xlhy0QuSSzUbN0zQj0Y6Ap)&lFHJ>=jq4p@+^J6RCT#LAUwNaKhGc{o)q4N zxu+6U-SY{S{`CUM3Um?)m#upqz_%`4{@S*KPPIT_ygjc&1&BHn|6Q`c%4U&b3*PVL zMI|}$-OJi|+?T{Trg)nV-16QFuWR4vMg6C4eIR@h_&_c1FrS4t`g*l4LMl}fc|*u% zjn*f=Kc8g}>Ks zyesz~-o*_zueMnf>l@1R6B-5`nySzw+eG>$ok!574ud5?!DA!h9?XT=a9fWniT^K8 z!y{8*cCPdm(kb3T*wf+=mnSU#N5-dw^h?6tjScxU@Z{{kJY1UiyngVO|F)QUS;^+$ z(8DCz9)|62N4#f{pRA*4N^M7>o^!0%llZOo+#d^JVCxEF6!N?k;iB5?I+mz+ScNquJxtE3Zak27rvtJEcPv`a_qE{+tMs za;MM+d{(|R@}@gAq_P>yRzUf6xD8a3flocXZcLPG!u0$f#eFdtx%l(tX)@`1 zT!t6sA4<9eLQPNZ0}0VbilGRMo26GsF4jNu4{9Aw8T?`q3W<*vyGFyS3f{oCkKiU^gGEOhaL$dVf!)W?W7hJ{9O;OeW+m zcHP6f9WYE6j}9>MY#C#vGFNy4VcEhk$EG@oeX?EeZE&h|kP? z>WUIxk5#^5k;SF0ao84n)6-M7ZDX}~$Fg3}!aCa$!5azssm3A*QL&h)i)g_bFrOne z_d#PhoFH5qMVZ^C$MR37x@PqZ=ZZ@B!8h#aU*^?&w$vOqSTu=vo?^7%<|Re2g=SFJ z=i&T>D)gOV@IcE2$p1wLcvo7IWsL<5y7oxUNAdUnwCy_y*DY^x8{AKK zStt|5BlY+#msplvVz+wbjPFcDKrb8N-RolO2|A^aw?gEwZh5%o^d=lb!7uulfc1)6 zt%QLX&#VhpK=5Y_gzd9a;UhugfR*6VZ4t3qCceHgwS7JU#py4t$a1wIV6nGlx#zny z__vhu>N=zCYkr`6q43++e9%&%>DPRr$+~=@3Tr!a>pdSwre0slDdW_4#&euR+vfo0 z^f5Unjhu00?d~d)`NPxr0#^@NlZq)j_con%vA1ChpEIk|{PJ-)Mh;gtNo;36jcQz0;1$;oiQ_CY$JL$ z6(JaHh!uB_@)f-m>EXizrzx|X`)^`%+wdnTWXp`Cr$1XvHqv&QI(-QtmP@;CqK!4t zc`_1`o51pl70JQ@e>IEo_v(bUqAn*bv>$qGs-zOGCv@mb$x6Xw9sX+}iu#>Zt4VGQ z@^Q{s^mNRJ)#J+Km#R{d`j*|h8sW%5Q5$}V{hOVn6hCjUB%Ux(DDTkfWs5~W19?)x zxvMG#3oJQG)(M@r_q&Bq4VJK_KBAw$gb>0e+!*CRe7XRhc(IcrT4=giip+QCZg8FQ z;nXguxfs4UROxumRuOO_F6DLo**c znsBofOV|Jtj6};)JSlw1-+f$(-bm(nA3gyYqRGFChZ;|wlGxvb`Q%@CghSl|<$jhh zFA8Nr(;6O>1euM6O6=~+mIqC5?m6+ zm^7F`UZjvuy$|z8w0}n^cnO!KOq9(qfP5qtb>-l5;QpydF~s(uEdnT)GlKmH<{N7{ z*+!3#eZ>4)bE--kB}zxc2QS<5H#q7UTpPj_#oW;ZpX;3cLA^>!RfGGG)bK;D60D(Y z#E%nDU?GN4L688$2nGv39K3P8tF_OP>m zXb#M*$5x(fN4_#Q0Hk8pMY!F4Tcm+DRV={2`HLwiKbrb=aW${;~3NrrgxxE6LE) zyo`&@li8)AS?%ws(7s&!@AIm^B1OR6uYYp&P5jkAN{E_phK{97Wm6+BfD?E-Se%I+X1R>4md45T!DY0g_qgzJ9vS zytq>@@PYEK74Zjk|B@GA>h8xFYYSz?)D+xp-6;ypk}dQk-BIy8nNrKPMn$zNdK<$u z7{1_W657wIUKCB=b>ccpE3`0GW#sZEG+Xr;2@`HwgC;B5YK-_2IL!P8ID932pO%T| zp#)hc7o3)M>Q#LQ9>r8&oUUv39TO;@V7?-{q`ISyf;KP>UE)h%o4*u&@`fg&D@+JV zJel{ST?)M3y=J>PfPDeSox!oQTfNmpeQ-71`zs5fLv}>6V++eB=>i)^wQc&p=tLen z{!#8Rsb%k`QNs19we2dw!@$$uzr>JIlV$j2p>%Bzi|uuKQW$Mq1iVwOUbH~$Ze~Vb z%`%fNxtUm#ll=fWVaoZ#HXeSuB(YDqRoc>6k{}O$*$AwTiivHpHEwye8yzwoPp}oK9|v> zSG?|_#8SVnM1%?(48=x$6}9$JusfZ9L;e5kbXjYFclb1e#K|j*#Z(S2enMcq#+^(( zkq+Z8KaCnd1qnZH@|yYqi_`9rUdBogS{igBb|YRo(s};FOL93``=^Xcd!)t%9#LA{ zWM>2U!Z!}>aT<<8V|gyNB?w8XJk~i!51|VGu5i{429rUH z^Ez!nvFHzLX-!A5-i&aOW2Zz)c-NDdaK(CrtaWv4o=ZO;!wqdD5liL_djY(rCaR|w zhW0e~spC)nb&BrCVSEYKCf%iW zhP5A(GF5K1DyVKo?!uuz4)N!Zg7MWZ zUDMZky2CotkR@N=PQKX*Qmz^iHL{Aw>stRk++FYvn#<9_?wWL2<|4{JR$z*tBL0i(qq-AB-X`jZ-M#t0kY@@~&((+3MN5L59FfJ&4)3L2m;O-jv z(Cwkrg~XpTkFr;E7L956Iz$RD5f5j@Q7Md;<2S~?C5q(rQk+SizkL=cIgKUF*S@o6 zrM+HLnqZ&ZwZ(dKQaMyBM^iZi{1}Hyo}wyc4_N4uBZXI{T|Q*=VyGVj+@a|w?==&; z;hExW_+uM-8$Pb1kXq;3%a>V=ylwG|=tA6s&&fJgrC4)w#fsKCp}gya zp?(K>>aj4jfYy2Pk-Y`DhqYQv1DylBTSggGJaqVXi#X!73$C?5eZ5)$nKcJP8Me7$ zw*}6Dn^bU`d+lmf3Y$;V<-j^od!s>tA?r zo;AI07G4Of{xvyijW51Cx-t6SuACxXN}3@HENnP2Wc)5%eB~P{`+kuMkc+2S+VJ?P zPAc&56iA1aZx~?O9tWm?$dI6JXQhfxwhOHrs%Pvg*x6Bg3w+R{NQ*ZWE8ttB_q(|2 z1PO)2bFgVmY712(ewMO|`&CgD5_iMy;NSRXv0TYVEm*Abc2fH%2nsOp81S{4Br*Z8UuiZF=$zDAoW5$; z$+UOlDIFPQewfb;SMHkfm#k&qMh+^K-`>gy8-2~hb5uEy|4>P@PdN>k&=E?@!q1h= z%b4MwUGx2ZD4pKivP|-DvWHsjaWWJm2pI(&SnDyPrj`YY7u4$NFl@{1ZGI5 z&u5EsHl4E2Heb@fQNCJo>@*mI6Sr%?CLQGLsMfBngNPTYM#Wip=i6 z&;Kid5UUjAoXvQu)>rIXmTExL_y5LB7_HH7ILhw+AE{+G>++_^)Bff*SF&7pXs+uO zy~$s5>!RO6Vy1nVLr(M<#f+hyIzAbXgqvJgA6KiB5`aa0KGHu)t$zVX$~ZT zu32Zyy9|sDSXM0C49v~9m=@6`c+C{c1;ibeU0c_BRC+}X5&ukr{~!JIFhy19l@C+@ zzysnlAy)_8ce^67{t(o=r?dIhF;H9&04PUC9a0RXhsOLUJuf`q+d*9t7|$cp{c0+| z=|01onc#4G!`k+^8L^qErISPlK}Yv^?6J$K^}b1HSVkB?#p6xb;1cA4UEUoFlsm?+ zoth1QF&lxtOzv|F`L}U5>X)uXrq(VTb^`w`=n%Jf3ZU>v`&xnjxz=ouOg-by;w}|B zaDErv;+^U0igR<*e%%*6isQF? zqRFhtFOrcSU4?ZHWCU(b#j;69xNX=5p&cj{Wu)C62AQIQKBkpHRjrUau#l2<1K3rC z&!|@k_}@|Nu303=dop=kmP}4u_RmM0_eRsoN0|~5$L9?wLI-c@*tJpp5n_Ej8`^E?jkc}l#Ly% z0!yhILyjj`#VOM$Ny3dOP-h+A<;L2Z=ZXl(PS|k4rpfU$dF?b0b6I}7G~@cE%={y0 z$0^PStnq!ao8*wqE;nHZ50K9&1G?uYn)bIdI|*7tCB8k1rYYd>0RPO%vVHxC+_ss) zWr#4TX9ud9`LAW_=4yumB02dYm7fx8E*_(?*`~ z`bT*54mT6=>~Iz`IH}R)t!$VF`s4dp%80ZN5+5fB46W}LfQ*jFr=%RNeOpK%Ucuho zwIV^`UrBv5SvR~m0a{^2koAjpudjPqLe!R8ZE~EZw1R?yZoS6-9Tz%m#x+?TAtO$n zQ@6LT%VJi3)KHez2v#rd{<>M|T%&-*blJ;nU|du{Ygs-IpEe3_OQ`evWa}$8;6bEh zM%VpS&wiQ$#7ul`(+p4LIMF(hYH-LPv0h!SoNJ6{&~{+5y7J8p(c@anYN}2~RN?3Get);{V3wS%q5#dDF!5Tv&kd6)>R_6i(0jC`Uc4rOVI+M@%GV(4>1j zW`(WD(|*m6s~6nM<{|}UE^-PLeyVacJ#g)(3u4S#r6WjsT;ZH|@j@|=RhsYd;jn-OO(iL6`w zB0$^WyWrMr{`z{GTpPC4(!{X`dzD4Nqi6sB0$@-m+Sc&xoD}uTYF~i=I&E z2JF!ull`~xXVf1w^U2wEA;_xHw%c5TVO+I&R2 zwl{@JBBHpslL{_#{h9!zV>Zsfd`^y;X5h7tLq|-sL(#kop`FAHYGdMSahG4=!q61< zV*ZH%A%cY7{jt?Rcw3Iky!W_+kt5ucQz zV!GSX(w`9bOy<*`Q2wDhsPujOcC;-ru-i)6hiFcA2npA2;Ta-qtK;$@UZws}1lSjB zb`V#v*LJDSS_@C}XMQRj(jvUdL`GS=ja*j|b?_7(N_-YvG(@FIsEe6V?NA0opKrBJ zJi`lvX!Z=IqTv_&yJUhy>}PurBnBNQ@Z}0u85rJ+d~X--0fq7nFIy)xB|!#MMB5JbLYl{xMbQzGTs z-f@UG;qr#=s%i1lC9;vdPK&TXA%k2ydxw@zWdED64UeJn_O<@A5wjXofg+jhRgTG> zHnq_OWZ|B3^u5DQ6)&eH;hYfU09!_L*2gtz+B@XV%mc{A!ahsrRg15WNjO?up(V31 zObp5uVdqceM2&mgo|%LtQtjUJB$7ne^;5g_oeksQDzwo{djj=yke25d&Qw5THdbAV z^lXlVapxxXir_tABiINbesQIB$6p-XWoSo{DIDlO^N2L+*?9JG3 z+OJSWQ&a9=WJKnrCD;bILXg-m1^kU|Y#}>|rT!C_-cL?R zyO4zHNHmWzp{P->I$PC&GURZ}j3{0Xm7*j78*nJ$WPX@QWEoNcEMliH_A*9b`++%( z+C>$qJUibpa!P z;qRX*eCgm0M^l0i`V*^G90HoPY;_7cA^K?kBHlk8NbaZU^VeORW zaoA_=!Ag3hwLv*v*5oF5gsV%E-Qm5Dl0K8yqQyKh)S`-9+Uij(`i$*RZK_LhcB4eY zs&q*a2TO12BX5lLNTndPzDxB{|NM}`39Y)j3XpwRVcM|DmVG)r(c{qU{!GyU$N1KHDg2`fH@u9M@?+@+TY{;L4cmDVFU z0{?_mpJ<<6ky6*My$bv>dvLF%UoH4m+V=fiPFoV}rV|MM66D3I+t2bCAjY3I#e>b- z06ca_3lKA4!YyH;uWEDknwlUa3MvP?$V>8jE4EtC&MuR?d5s0E>(5;-5WbojebpyC z2rFruF}9Yji_#0^AM011C!)T}1_Tx`oc5!)i}dtdxLtxv&a3If<<(xT-Lgq4xthIP zOWYCbwz$SQ{Vf{?9Ch?l@cz(8{BQ>hE2_yHmqN4C} zIhe!D;6NFHXN?9^RCw-i`+^m{c)(cBnrloQkc)>p#K2C-yPaDd62zwd9a>&L+KSy^ zNJjyN%)-ZLRlkO>#9&FmM;X_5{6_i{hun;=>7RcE4Dm*-gQId~s$0E#&4g6F0Cy)i z`T2O_ypezSGN;CGn=rpo`yy<_g2Y;iwcNw|;r)42Yh$%5{T7MHJemT-=_l_`4D$KV zM-RixOx-S2jR)AgKWVKX3P%aHwGm5x=a_dH7``@%#PG=tiV?p~kNoB+l|^;>jmZOP zf?)JW7H*+iFc~@vHbe2MQvZEj1z_NBy|Bq(k~swxR?D@McsQ>MtMR$ldK4+M#k7YD z=a_I83z{U7>LVe1$R_#@2Gyp#XN9H6i(4g6{qM{fbVC=n&{RN+>iw8GS~U|N_xtXP zYAA~$-M7)89eSinu<%_;EgCD&?;V0tJrlxEPdrPmC_373_H30Df!jL*2!PI&CBJi7 zXC&Bq((l=bB$N*?h(&l+5JQFw#y;NfBk(l0r^- zNW9mATjY*K9>g|i?bWCU+fvSG$DYfpFtk9_dWJJidQ8k4FWd9aJbaV^cVte2APdKss zIBxf$5d)2(5yhK5GCxQcsoxe>2;g>hu;A5W<%0isI2x3E$^(1qX3$2VQqCap?1sAa zLO4-I?JTvN0RVIYb3^zms#CeTWQ>fC$Z*Azk=O%t8aybVOE83>D%+8(7S@LXtBT=a zlsDAfK425&0Qo2ghOK6LAa8RaJrqu;Lg=UIOB(*Hv00F}&uQu0J>}SG4vEdkF)M)s z(_RB|)V~#gxBhQ1;r_!EP$i)`jOhQi3O_=UYT?g14Iz$>SGqoQxn%&wf|;w7HH zdOCOr^5P(DwQeh#kjZ|A2}nNFSSdT`2TtNhLcSK`t@5u1r5;dk9l0>qaJlDywd>bpSO9Mlah zs&@M4nfOQEdY+2<+GIU{RnefVLqmq;+KpK}=%uv3AQU7z=J8`M#R^_6cz7i{a?;b|nJLEs5L@abj$=GVb(7+Z>0P${5KMR_GB z#O27-=IJl!dk;c~)H=?c)j0gk@D0fZ$m1lo1~AwqA~9we$;n}-g`xBeeloBzFu_=G zFznO|fo|Jg7cMdPZZh|deSo^}mjxd4keg7bO>b0w#~5*sZN+?Xw)WTWdy`MVT5U_kxh_jy@70of z?lZO&H~)FmPQwJ{^UM{8&dMI6s$M+itkE$!29=s+_c{c3El~Rj=2oy}mBKV2$lB(V z{`<>*W&Je^_0Eu~g$cjZ*u*&)fC>C%dV9>8iOy;;g-t4gje!k)3O_=s?KPss0}FcO za2K9+6|sai?usW3l5uzR;jIn(aonxg1o9C1tMro#X~~s6L8IsR{1#Fc6z9D(d(ehmXBXpoE)d-N3m4^0%bgB*;r#BFun3$-|z#IUx!1=svtBY2R% z^|$BHPB1X$vrXot*S$Au0LDyoncW_)?+vGZ7sI+>DjkJ36bboI-%D5s$C{+s-=+qt zKTO?Rasw(NhiNXqW9DkH4^v&vT)9a1s@YWAGIqYp#unN#VIq2UVMWh{#E*u}=B%rf zs6|9CpnIl2rX%rKIRZu<+%uhxdA0{cz(LX6 zETxau$K7KReaQrbGmH?uStA?wY)3-|HF*9qbmG4B>J(fq_{t%9!v^C z*4@2QMIW)cbwL4V3`5!bp9JIJJTWARuTXdsHd=$qE=5^$0$C zRz#7ME}j0IeN3lqYtx4|1~0jQ1nUZ9N%kx$l@<<*mOg+M@k|HN*`O_ypMRp9{A-9R z7@2|(&K|F(@PErg-3_%~Aa>P=3vE6?@GF<^pv$+@vL5@X8Mir#db?>!Mj>kU8VQpm zzX4G9&r&r--;7=a%SUF19aNm#3I~JJC^r55!oX}{C5t-bifs?adyF3QBCbJZq8|%> zS-6Ezpgxy;;@9!Eoa=&B%WN&wS0_RK8R|Dk_#?#@V*<^P16fz&e1uHw}YzSPZUJ)Q#_I&O-LGno!s|0$JF5 z7*Xh^ac5}49xTE}mp8T&3MQ1*rm4&QNGlq9s#F9G9Vfw0@g{Qy*FAe)OlE6qIIa!( z@!tK4dASQyDlihi)K@ z!q^`2H@TtBgHyHv&b8s4*|eAR09)zNohb|cRA}O%vsv2at39_SQpdntKO5tuVomC7 zW{mkjyg?yLXkxpeTnE`J3d;L3L4Em$J*)QQT=@cyBca5_WL8hcCJV0HJOQ5c{_!Lm zedJ2$m~#($!VS4hYT@a(0k${cv}VAY1xk6rqNs#fGZ;d1;>vF+N4!WD>;BNW4j*s7 z{1prL2ji~k3|od}!9B@DexJPwJ&-ogPVxdJ?x7v27kk)2Snm9(8)kjI@y@LE*9n93 zSzqhRY;`jq6WV@CKz-XKX~;P zHYR$n7jc~^#nlq8hH@{STkOt{Tp{5`B|aq?K;=Qpvd!f3rfc zRcfw!;zTZZIa{fkht^a}KrN3*dM?)Vq!jK!ndYrY88Lh+rTrDPHt@5w%xJWv2EMd>ubB~=Qk9WVwnsgs8ha2dm z)8X8gGf1R$W@Wt9PJnJSG`(X2&xw5|H?*HE6Rc=1u1f~&fT=B>h4gXh=R zUj2B&)m!|>)Ww%o!-G%%r{K$#!Pv#&*C+6K^!3rL4s3+pI5gjru)>Vre5~t3P^oj( z9~DRZ(0m+t4sEa>^f4a|-LfB89OA5cU|RUxN~ZrB$!=yE_SXKh1~ z@m!Q^Z2H2%5N7y9=k8wQPye`fW0_fOCGt=eGn!ucO<(gQxz*QN>m|Lax3HAz=5+9T zA})npARtX)ALv2hmRL^Vw!cb|8wf{f=fv~$3;B3G<43t8zC@WL*gs@Sl>bAGBffN* zBYwzyYuby+ojZXF(H1*D5qsQQ9w+m#3tNL%Vu8vF`pi7SrS&)2J|4`#InJGZIezCk zxpmb!j^08!Ug}x-FkA}J*^^j17P3^h-z5OKof zj26*i4nb~)TS7;nPBKYbKD^9FFDEyx=I792;{shgfK82J#on%!-Ue;0%fQs+6b~}@ zG`B{tC)l}hdP*rzaYZ>fKc|2nc}95vB~ksy$(qs5DmRU{f1R%YQ;$GMwIp+|c@(IQ zOqd`Wjc^#+Tn?_U##k@8zy%}VF-sl|UJ;a@XW}XFmvg`h(@mN#Zb>Lr$fA+OVFppEYTum#omfoVbgA#G5&4ec9M3t$+|^8 zIYF~Fw|M?b?UVSx@8OdKrSt*Iy&Z}x=FhL};RFY8O(5nfD-A=Af39=b%>|2TTA?d+ zY;S{h#|(_^gC*?wCppS{=UC7I#H54KWcB5u$;a42ZiTpheOG6Dwwv6U{cg%xSdD$# zW-V>`%hg$0`%>9)0@Yf|l2^Aq|4X@^RCGrtp;p&bwXn-$yQV<8-<_cZE2}OHJo+RU zW&Js{8DQne9M^sy@ap|EVk+ z;)7_VkUDURZH_Ka(M&nj;R#%tqOa?`!$^OP0-P&%j=j>S;l_`&g1f>~#{!1J^Y0?* z$<<2**Z$>VptU96b8?+ae|!;4#QgS{2kA0+hE& zDS*}pwGiJ6g4+l$_&_jsZ;rJw?<%}dpf{kh1*;w666Om+a*@tk-9lrbQx$4l8STt$ zYh2fFU1wZWv8c+kG%Ek3gA<}`UcsNGvlLb`f6V|2fwl!%=`-;?6Nz!B>FnDD*iAgd zg?3=hHiIGbG)ddlL42FQ*|E-IR*w8Sx~=#1 z75-|-$(T7|hCG+SG&j|Y!QZ(IVnx?3u3Cy2`EHCnmt&APRe{03i8@&QPQLz{JJ#Eb z#s`Ft>OD}R9CO%&r;6+FuOu128pUE5DOXgJpxEh|pmqchytovBTlvYHGr*HW0 z!%QC%B?5PTz^!r@06jb9MJ#TWqhuZ*7P8)uut-$sS#68iB$nYR;>J8*9sdb-&Jb_2 z95ZWnt@!+H#4e~sRQ5?nNBs|RHBhe1j2H#d(3@iP+6HgtC+K^6+8AKEG=odL=ZRBX zx20J81YX+gV=3E~yvRiLHZi?o8yxM!%J!I%*R*9WSJZY`)bMnVEjzfd91{2ojo=aR z6Irk}!2)5pF>gCTs3`M92CuxE^v{+j!B*FkB+2={52T00wiXFLU z79h55vfMu;|Jc$AZr)^Jd&DeaCVOCKm_BKjD^Mf5VVU2sG24MYB(WO@IW^VY?Rn~B zK=sRVihHs+dLhwis9=zP0t;!E!>svWF}q1l%xp03rY@sJz^)?7J@zDI78VpfJq1P= zi@+ADxrE^_2MOE<+IAb?feA-xB}DUYIrfi`V^uAY)HIOTI0P{Hn8FAF4CudkA6!d# zkyS4sp3>VD8!t?@*G7pgDFdpr5Kdk4u|3o-3lpoQ0}+D>0)XYsKJhG_EHs-g+xi6q z_4*HKm4e_W(eXF&0q|dEr^Gjq((>~$oKtU3Vu4_%mRQsZi-GA%z)rTgy{g9z)|iSg z`dZ8O&qpMa8{8u+ezUb1&(Y9a5YIK(JdW?y`)K^Pcm(m&U?aaH#$kvLtKgZGRi;S< z;?Q8O1gIh-rV+$|A2!$s4*iD6JfG}Q ztK-;h;|LRQLe^H(cz6V7zCTYnm1&9=^7Qpo-;5KRLtLViZc6E6gOi3(64e7F<>HDV zBf-v?C0)@MkE1uyH)huIcs)p+B!2r{nomp-lwGtIu<^O>1L{Z7>-+0*YZ<)CKOIX3 zXV*_ySVeuNvM{@kFbW!uc75uD-?k`Gy@nNVj#;MYJs+JvE|lOq9np!t`Q{pY2JtovXe{j zla@usA{uSU*xHm}1-1E(Vxy$@{dWIrO^A~ACmm%a_EDX@cv<#lsq8AKV{mnxiceWW zC4D_))=H_75C06#a8~lS9c%oK=_#X*Ak#Vpgrkma>t!5Vfiig-X6#cOoMOPgNYBeC zP=QSQvmZow1!&Ian(du;@Vc=Lpi(i{C>1K{Qjh{f;*Tmu2-Dp8llr}V$e-PN#C}!ntkb^l2}2iEZ!`ajLc&Jnb8iS zXh6P&yT5xI==Te;{yb(}L?ml&+*u823~!RWZA4fX8V%%d?#AzNQQYxTtv>*zC~p6g zR?xUe?)tE{UX&>AeCl>T{y4?!pGaPC4pa(kK`Qv=jf#b6+!Hr{nIr!;A``6O&%yBe zTjyK}y#TPLzq^EkqCa_gCFQafL~*aECAAZ42?(3iXauM)xUc5r~nV!?4&2 z)k#)H3oB_8h>_Lvz&$2udghc0k*S9a;US~cm~8s`D@Dr)+X~IV43npE=4z_$#V_M2 ziniHHogz|gqG7;rGN7!1k@14DWq>BWfvD<$n2b5sC-KaXWc~7%Z({>Ww3E;nMyMBe z(Z}rkG2G6aHw|i?zjF&xt|1ZYv?WxnhGCwwSvt>}@+`n*+1bn2F_dV=i;y~+D)~#L z5_ddBGyMbAKhSji0i9_?9VDw{S_YNJvi6cd|A+rpWb1XNT5x=RSVxEnvURWx;~!)R zOmYa$;Q2TO0D3}gA9AEpn^Y=U;afDrKadr+8xs#v9a<+j_Wc{*I%AAh>KLZCQwDi` zx6RggG^yHMM9ZsC|h&CMSK zTQ4UU73IA5hlXA!3zBylANHy&BQgKXtl!+7$Swz3KQZBva52XXGewT)We!CP7n#l%k6f_9fgDTxTOCswJ`|;{Z(t|-)og!fC9&kgbXAv7AZH)FQn)2wBQTp#JWrgtw>B}CFH!sZ0adz$gJ7==??;5Z z)RwPimEGO5SRv8@ODNmebg6nUMHY(_6_EI&N)D=y2FK$*bW#$>BPua(N)jhMGBK}L z66aq0FEO(++EjC0*Aq4Pf}0tWXhl}NZ$UA*$ZHtPhUULPWf;(oBxQx^7;!CzcZ4ci zf<2yb0)rXS#qpux#jw;M2b`IW zdf#XI9vA5KNBJKRJ^JVFA<>nV10(A=?Zj^pMe{^HcbISOZi9_Ipohn3nbFNjbCJb| zYVAt6IxSz#J(OJZ^mNJfZQ7dIz1!N$4Iie*Q5PfiANE_1d^89#<6J`^ygS(Otp4eC zFl-1HFc7tnB?n=+SVrQLTK=-tNtYY{}Y@wwJbt2(Ehn{B!rO~b;{{^KuBHCqM zWE$nwh=-NH9n5rxD8RS%m*CsCC`^vdBXBF-J3+$l3osXv9L~O$E*;jND7-TM0TknV zLS)aQL^_?@6Fw|g#A@>dyPX@6Jc7MhB74+|nYEwGb?Dzs(HyR%8=cwT@sBOh5t(Js zNi<|9$c;M0Tlg)JNuY02@fcg}5&cR>*Ge;McXf*5swz6%lk3>nNs}t~qxMB>L)sv; z;F31>S~4DT#)GiqR|r=@l5hu+1UN~Ox}_FEN(s_bkb9@mv1ezrK+B*L24-WFf{F-= zp($1*2YGrVDnGkUrR-;WY5L}2pevjdM?Fv*>RjRSu`BUKcr`I%y;NZKrJurAmu%dc%ZV$23dsSAEOf~Ji9M}Hj#icNNzxJxv@EWNY#8p2Mk#b$%?LkJSR zf*DM?hBUgi!#MIgGcc>Sop3v%wTzj1u%aTNC3?TaPGvR<)Zb@clR2rdlc8MT&||S` z7UeTDP9X{(XQ=SPWu~PHrhrgNmziw$Zh+s&;s4;*%Ql)>d}SpWSIkk`;dn(Lr*XZa z$C~66_``sumf+vZ>S;j|en%|;cll#x#%rVVIwjkD#~fv{01zZAHTz z7hsf6CUiyrkqGPB;&q~N-F!G1XYDaFiFtf)pn_Z|zH`?;Xz*Pc4Qh`?uoI z@bxoGe(rM8_CQETzBvBDplQ5PZ8Pi_s8Z+b2gfUtbhI1NnJ3hORPH?ZF{m78m^>a0 zZjx~691=?wGM94GvHR+b6tNoowvrHMm?AxTlq~&z7f+V99lkcbo0_J6m&kn{4b2|{ zT257KE&(-1)_Qq8Lyb)EXTOG23t!qT_yP40V5XVJ8))sK)`47jCrV2`2(y?NbQyqY zK6t`$;bT2w;C8-r+l=q3k0IRI$YG5+EX zqabfv2Pj?!O3IYs1FO|+HJ)4Oc8&ujc(asif2y{Xk@eq)iq2>>2B?9+4$;!=mVyTo zkys;3(JDo4pka2|c&E?}n^K1`a($j8`V)2;bN^uVS|bbZA|dR;mt^*0W|kx%_E(w( zi)%z(x#s`Xdv}!9}aTU0LB> zwa^pT$j>a(CY z1sJf;-~D+T$yxstpFk)}`Y><7{?HUdvLCDc z5RIhTC07{sggLslN_#hH`|5c_{K4mG(hZPS$Vk&us1d}-v`;ZO7{NS^Y#z&W3GC_2 zjAW)oFq5@`lg+vQm}#%DYTx^6bS0Lm&^_zQj?4k!d@wEiQ1fb8wsYt#g2~SdblmS` zbpXSg_4qyNlgO_lpA4#%ua@kXw0!BMo~+9lp6wq4U*U4yLO8K8qCXP)TVTjtXN`eM z{uLKAkC9}8-wZCy<5RI`091G58HnX_EPm=Op#RLlS!-2FnUB&Y8@0w=+T1}$)xQ_SmKV{?1?jUqx|)Z(B{tzP~;QIOYtsjTl8 zYD3goUuTyEeH!dwA@8r>3xQ?*CtoaR?-v*d#ioM3X$Jma-8XiE+`=#2xE?tc#M&r+|7=v|bA2Hu?o z;C&hc>%=-De==tjUp#djrZ~SADydy@M&eg(Chwk~Nn11J(<|0HCy?Fx3{FEgRI0bG zG@-sFBe<+Dn-wIffClDZtxIlZ#qf;Ecz@Me!DImClEf;LQ|r-*3CgOiOHWnXjjPVa z9^lzSF}|Su3`hEUA;ae2&okz$xhIUt=oUBAU!`AGzjeyT?FAv`%z|7;*L36&^zFSW ziR%W{Xh82)kuUA|1AvHo6;QR37bQ8%WawHQ)F=yJ8!cryj*?r#=VqmJ+kHF`TzxGQ zNpF~@w~n@ps!rJUXy#98-+6v{VhxMnAuaj)?a(r%-^|^S=J8=Dmo$lXko{zjt>W#+ zBwr?^d_uHpfuaoAS_0;#lU`lg$}Kv3g-p^|!=>;Gs{O-eDexz=zuCJ6Z9Ijnz%Kvo z?fj-?HF2KI(1Ot_rhUP*NSY4S!aJw_DiC}yWSuovzrA<)d8<@kl!fPsES3-NpH1ij{8b< zE;T7sm-SvrCBQI?W6Mzss4G2mWlupv3v2p%v&h)TPeKH89lE&CC${Q1DvlZaEY$Rl zm~=r2iim{PTJ{?~fg=H!DaE@GRam==nny0U%+^i>c5YhMf^3WLE5B;c>hQmDmHc_< zx8@a%T@JA9o#aDLD5SJO+0k!h9W#5Roa5+y~vlEpg6s}1O@z%YZ-B|)y%xc!1^tw0-dej}^4*e|w^1Nz7 z$lqDQEDBt{dK3&4u13ijyv04&`B+-B;t){#dx*=-H9(>C9i5pTTSWAkZ=}sq329vD zIRX}A>)1&~(>T#<^fH0VZwnYuFNdYilx-GFE+%yO&5QV)dEMG#RBOdDqIdr1wJ72N z?xEQiYnuU*4NQgo9Fp=}nS9hr&YoyE2vPRP|%xsLYOuIA9AU zOUDS4)KNhwb3v+_*m{%I`jA>VrXkra-Diwt+z9>jcd?c8QXux2|}1Q%cBIDGkqW4hmp)V4^IwX zoFyG1#F*`16mM*UPbclP>`L&74tUb6+q%!J5&(hS^09zs`R?4{btAI930T|yP`I0M zponjfNHA@wy_fMqF4^(orxJbLG1-kIm=6C`Wgu3%h7>nz${QVd8%bSJQ86)0$uQI= z52KfHrqKqlRcSnT3ak&hl$8X*yYHOgUaCCj zM&PYzryG;`@7Mrg;&Wx{z-Q{00wut6Tde)EDM`DPajGOW$eBOmcK-Xoa)F2xH>RUb zy-WRL-^;czsYc@qapKihbknU;?du`BhB5u&E8_6;Bfk~sKDGu#?%qw!vked??jrKs zeP(#uk1upOStIn@YcyydPEGcos}Hj=Q16>#6LUM0?xXdpSZlg~HcREqvJz8r-Z{{q z!i!e0iXx+jNMX4%x9Ej@$ep=z}B%3$Iw0 zN-vM=_GX8D_rZx6INjLY2sGCQ-S3{}?rn*?@sc0Ak)R8M*xGuLMhgdHDLc4#l8a2` z?sUo-T%AM(QC57wI_^#tb139xa#7O`L+Ct%jW6>8y=Cem{x4Y^4?%G(t*JDOVL0-| zQYCB9*TYs)>Nbi$q~Dt-4`=!QDg+&0rhwwdu>Qb(8=mIZk2DXB!4?{gH1Y^k!1oX= zM9F<3y?+`ZRk&w;!>G@ z^8ZzV-^;awyw9}*QEuO-G-9ueX1537w%4df$SV(U`I}_gAvt8ifcpVOZ_$NO0r!Kg)tQp}w?C5fdyT`RU2+VZF)P)td zFuCc!sl0L~KV}|_!M4!#989WnuY9}Zmrg5|BQwTy7klFbuuwgi8}SaGSt^XT$G@kg z@c?No7s^Q}=#t|rG-FHK^5$5A^JD*ct-{S)A?9xtUIB(sCRi03P+!ODCnAIIH=QG$ z7*H}^S@LD*Z6BbUuAuIvf<*${kKI!Ev+*>CJK6M&{`yR~7}}Li2om zbz~NVar@wFe4ouXc0a0Ak|a3gC)^`A(YFF{zClAQXuzT`gnVav@g;s^ith8#{F3WU z=yS+M`IeiJ5Y}MlPuUa$-8Se*u$lut_qTzFd~D9mQF3^0uVPKik6>ARefY!!99WL; z1W}SJDt_j{Fh2_l9BFH?vCRk~u0O~%E)YEI&CQQG-sITu6GPe@iGT*qBl|`T!BVPU z%*-XwEX~7WCU+;FsfCrw&l;>dJSQBM6>X&pD&<9f;ASIDD)ACJXymN?!cz(?*2 zo>Rs#3L>1>2(fbbpe|#he4Eb*| zt=#xYKCfPJM%B5MHhLl`qTNE+(i!R7J#w&B(vV7JEw^%7SFmbS{(sS^70vVZmfxq-n@dmhEog~v0@&vvh=Fo6xhVzcXK4$Hwx1`)QODfDU zYVSp1AdOru2;S}xHrG~5zno|*N5%T%F6NReiNhD}b z!hU_-GdiM`>LsN~jzCdr9D-1EA{kuGXCN13CG2Ccs%%jWZt+>HAx*Yt7sO&p0Z)vwhe!(d?3Y@!ju4^CDBt%atxVEv5XZL79)~{g&@0hX9FkHaPgBNqljF=C}P)XlNaHWHU15^ zv!GTp-ugGPvrU(OgiffBkU(ZBWGCcWW)f?)^H5$3Y@cE@eveL%a<_jsN_+hY%O{w) zYy$9Sto5{w(Dxsye9AuAjQBmOyoNRjvB;~jOY|?DftX-eclCxv8z9=?lqje=Sue4} zRzI+Z#Oin|L8`)6K()D`-?pw9aREQadL$rL#VCUzw-0kPmx)NJ8nlT(d1<+w=0$(W zKx)RUWr0QUXW7fft+>D0f>aBN9g#xgf(1CmB+-{7JWu%iH)ZNM>#^n`P&tO$?kE2` z7eJ(mVYG!xqU2w@sU@Mzw*E#r1CM3=(?H%KUCchjweh|;;W)a;GP!!wNPoco#f4nY z)96Oc1LG465&43A;34@P>#h}t>t`w1P4G$n70O>Jtrj!08iQ1f6no&rc{JmQZw(M{ zr(-mtpSGLcvP)?2Kd`lms+3Gq@JW&ALt z6?(c>4jtMr>fhjDz}a?hm+$Ls9!2h(ZW;(~xNw*Gwn?V}XU?rUllm5|(le=In>K;@ zW{6q@Ph1iKDrUUNenl_;%GvD3;RCcNphvW;x#X4ZreXk@-Y8hFDA?pTb@B5Joamn0ZbQ3H9j`Pxu%`%8HC*$wxZ+prW)lg&Ls z>36%(1`3z*w?VNjC_g>Fo$j|q*OQhWo5f#dopcg1cdS4^r;5Y9ix{riwnT9%X>Z2T z8^)`xWVkhqs$bVsQ&se@P^XjIE_<@9rgSj~KlThP@qm5WC=jv5l)=s;P>sg0GiWixT`RrGWwIq6aW^?+F$$g;|_@J?5~sJB}CGwVep{6j3Ts|(mS@e6S1NN z(xG0Zw$tTdNra-Z8NcLXAoaT56U+NLk6URfW#F+Kcn@%^;pS!oFnKNRO_JCzK|9*x zD#P77D(>Rt4c{UNVvDOMmtzi#Jx3S{j9Y{_QS$l*s*=rHQ~41ol=QVQ+9pLA-jWlEJa#bL`pqu?}UJ7)j;L*^0LgT&4gkXu&z_@XFExO_Cd+6Zy^p;Y(9MS|6i7cNRkD#UOx))9tNstX%9o(8bqnukS% zgm8bi;eL(bM}qNVAij{rl3N$o5M7SKnN*e1I`sY)6C=I2poGIvE-K@TkEJ10f}5Lb z8qp&#oH6m__MmZzP~5I=nW2JG}^`0;y_7`=9Ar8r_aT(pC8a#BZGg zZku@rRXc|NuoDs z^Wi)f5A5#g#ormLxfwILnDyrL$l;HQS`91O#-sjv|y7dB6(uHSE3EtJX)DxyVDIS(n7jS_fM>7y z(*8h(b$`Im3-^6IP*XxNFus-M!4$I5d;Y$8j??@u*P&gP57;%x)q4Q4v(As4wU9A& z6%+dS6*{BIs7Dddv`-mi-bxrQj*sIG$<^3ViF9jqd<+SkxSQI=XK`4@dsldm5_bxc z$8HZQnQ|Qp8Z~=X?Q2Xs9xD%|eaE;qD~o`FhI(^ULSi;oj4Ro+UPtlHzEm>h0WS)j z=jh%=vgKYlP%W{l`mozLbxXEM_BFCpH!(r+mNC;P-uosMr3|OL#X4^jNMT7i(g;-} zHg9=XNUYsJ8LLfCbT9i3CY|wtX6JzB21Q=)e+s$GjuSqONs|rb=mxiFi4w#XsHAFP z;crurlL#4=ts-7O z)ayJes+sH-$yA^0IH!d`Dh6?584n@cGzsWX=(dqQh zM0$7uvg^?_jXs`e+d$A6yeP4DdVm2^r>*uu;>;&`;;ggHFiJaqsz3BRGh8Jfj${L- zSB1j(k2c8jdfh_(JZCw^uzC;jT29ezw;xZxw-kHFu=OZv^U?77ewP1o)PyJH2**&b z`CS>Ph%*9LG9{}Hb{u7jmHO92-HN7PYmaIIFzcT;66M`am-|f~H<|m`iRGl9X&ecQ z-~%tJ$ka}}N9QfC4>FMlw+RymDg8L{B-i{Jp@3=dn@NE!)s_oE_lJd0cI8c!vQoAr z)d+>VnhO}T)RPWcjl=(n71k10@^f$XM5Di0 z%kC()6bl(0zuGXT=pc=q5z3}ZrhyD+yl=lt&O}S~Fa%#D3Nl~6|D4ECiNM)Sa5`17 zm9gV@1*s}8LYRhU`8XkNTS#V`{zpr!R=(OEm84DxOV8mKAn!+R26tMetG<1lJGOXT zap-dGrnx)1xVit;)r{lXirz=z%;FoL=#i_JdySUS7Y@te_2X)_)n~zQJp@E`g&kiJ zTN58Z4cwiY;oY1+19>!R#x~cAX>GT6-3?%oXU0P*nkBP%9>&GESdBuqhjmjd^Y)C0menN-9sTqNlXM|QpS_63=g6UHAT9L_ZntRtJFZ9K4YYX4#~!il1V znbIa;ZYk|4$aV@GsKxG?mz;H6Oc1Acrt?ZwYc{?k3OnQd-1ZBMV*py~-y_F3$lktD zRjG5kbVtK^jIN{2`%38(K4uB?rWZ}H`DWd);j{f=ueg<6TGxeNB(sY|p3!(9n^NcB zuV=JCE{C2lkUyBe*$J=nt@So3C#u^{$@VndGb8ZG5veHN-80jeeQh-4o1BTbDtb{K z_l7%;Z%KKWm8MT<;Q$d?P<$A>gV|t`O<;So*?z}PA{Xvwir=miD6+`-rNOSx6hIrL z{q1h2gaV~W9G5JVP~|CuEYXNnob4_rl?i}P=QY6DL9kChf9+VLdp-Q3TQy;YPb9{{Ig@(3#U|Io7 zuLl0u4m7Y`s~H8pdh^m#MUg%Y+^~R%Noa!`MBiR!1stYfUO=(==9}X2Bnrg)&^U@% zb)k3jYJ^DeVLn2263Ak`Ub3Vma`02=k0RdXiw;ucAOI>)UzZ2+_e{Yjt9(0M5@9c8 z4JlL_;dj^fXdyC>K=f31TPDrr(O5o_)k`JgEH+ z5D>WjXiyu_sODbi0S*yaN!1+!uGE{}uwab0=V{ij&Ewy$wiwMYyMhT=RMD&NIZ-P5 z1dRX9(m*FB&Uah7x~q?olw7|H`f)8)K`Le!5L?*$gjT1GBCJ4olzZ+)3d{P$zWMgY z^lo_pihK=bTolEk>)rdH-ailW;Y+lPQ@XMj1q1??HsK`!`C(#;V`Ij?Yj{c>J~H8( z1_l_G@_d!hAr+~%&}wXWR~a*C`2qxnNo-Sl`+$c*NHQbQW%fVjBd&R4mscf+u5DI9 z1-tAr8U)o}(l&QHOuEhHN7fNlgU*U(yD=(`rSw(GAt4oMxK=2c*Ef5NE>k#R_&Z&v zNhJ3YQ!ILu4#2vz+xnq{5#X`b;Rc%;Utz4KLf1*j&77WXCfwZ#4lPJ@diY~1uQj-M2 z9)f8pD-k1M(t!_2&ym>&%uMJN`}FzLNY&AcvD#

      hLkSek{JxgxhY=TU~p87SQ`^ zJ#%-2yPqZIg~&L9{FdZZHNSc=OHtvmhFAOEt_*TNvqXEd;D{g-!yD?meC(rKZg3n= z5-`CwX7_G>8oZD(A4mCUK3oj{_tw61AdR2>MUp5a8HfAQ4J6Fbu{VWmHPOSSty;gW zHjval(NsIRLWMaa% z9aiS^>yLpgKw7#b0S2Ox`Ub<~1o-8>!FL^h`ZKQ(MpWz!|tp{HQTyi3kbjqZ+-sP*Xve%n#316CBfJM^?YL6PBx zyryGeFwQj|-~5?{OJaGxkSIA$7%?yO^SC9XT=H9JhbQ_(=0!3C+EfyiMwqR1 zUI9+%Jx{oEhs(ceHWke?4xor!r>Mj+#JEBJL%#q(-UX9{F9UfzX`%k3M@L}hetXCL zoae#0R`*uWLnT~Wr@6rn?8Tf(%DLhvRvr%PXGoBO6e8APz{$7}G|G{pC*R?~%`Dmx zjp}&KL=r-N9Pu4Z_+chpKctur2hN@;5xrbQVMy^m0qAb(&+uuMa~$VfS`eWa+ImLz zgE`>gk~FCE84E@`d08$Jx{B-eIDg}g72hbO(<5B`mpqV{)n1e{A>*O>3Jo<2@{j%w zHu{WL+TS;ZT*Y9TEAfwwxU2M^(wAdpZl5kV z$Kw;!(4!i*?@!Wz)YA5 zJBkSO2|1X9O1)MeW924T_etT_+h~|0)Bu5dVx={#mocW$eEaLA}6aCv=qeHq+;w`394p&jpEi?ny zaq3u$Ni}!{ux!f+7zPYcy_$pz$RuERz8F4hRs;zvvF+5u>!X*wX+;J<;({CX+JQuG zxGl?6h)^kOYFdV*U^N$(;3QZ|Fz@-Pc)8gi`jd3G3^HRTTp(P z-1l+V$ekI{1u+=;&<(zjy=o1lwD(jhz`itbH>Wn8+_moJNEQ=%@=;TY<5JydIHguW z_X_3o45s!(8w(NGuu7=hT#&iwQRgMr$naLGUzcuHr2-!Qx{Za4qj zmUeAON?Imkn3v5CtL7$>Z&Xwt3rz7bet3<3sTo7=>`z$Nd)EI^%*?S}y|iweE=agd zN{fBKNXdK7Bok}Nunr<=(!9z1^*k9}*sN_|MWL+{iw#2xT08!6{V0* zCcNP@F`r1;fKLI}6k4*%0%Ju^XdYM}^f(Uc=@l0>o^1vMRaM#ZePjRv*QG< z<1LA7egmjowjo%M9p@z}54J2JF#~a5&!$e(_eZf5gc-?I!9sF~ck(^Ah8Mz`PcSM| zJ#?e?p_}UUqe|DVorc+}#ov==Dain$H|Co~m)X>KO|12!C1mMOg*04(7aSj)lsB-= z@p3N-{=uM#>5!q~`*^=FjAL?`M0E+N)IUf1Ldg>mi9g|q(MqTr4EWO#LQMkx-sa@8 z`mWy^sWEC=4bQU6Xu#6;&_BS|J&+fhvVTN+oR!oV-WbE=Z4a*wgZ^28%~Jyw7SGKY z=+fa__AUgSXN#utReDryKd@vS?t2H)B~xO;P$8{jn>F=& zKjK6yu+xB3>%H3>6oViHVo@equCI21k+3zf=SmKkZ_}K1; zx_p>zhfxL<4VQN1b{tZO!+`?EK05)#)2a3fyW6OTynm^@*pQ|rpvGdJd7ywxfmBv{ z%7jw%a&9PtL_D35bHR6B47_Nc5glKh8m4w(!frGog)6!mgtYPMz&ry4lQ~xPExHU6 zQ=`AY8?vqYr0CUX3_f||s7*2H$Hn$nfZ%2#(Jd=m9JxaZxe38I_?0yCeJ&E`$KXNR zo|OZ|&%)$eirsMzqVwa&KPY05jPNb2Z16ozH{;6(D;%_Mgt0E5@~2#^1Kve1%A1~t zKr6csQJm^M8uw65HzGiC^oQ|vaGPi8i}&Ry2hr|cHAunYNRVf%Yr@LJ%x7F2F)e1M zRNxIe>a4trF}n$VHyH#BL?7}Ur1B_3xyR$nx9V^WGB5tE`algOw$Z!SL1>RK{Ckl{ zWn=<+Ey_X^a)7w02)y@NMUDStimym&0Ep@Z(u2j4q<*wUS{Jb2#L>er;FDANwh)HZ z^74rkHWMrMgJ1&wv58v^M^w#dNO~RF!R#>q2}d-ce-xiPge-8E1|b}xUAlQyv?B7& zQSi0m;`dC(V80t?R6tD}WA%veR@W8WmF`_X1v=C)ay?fux98x|VOE9BK)$GW=|%wxt5ab_o}kefIE7 zXHC8p1P_PjUnT!W$5>t!|9LQcbD9*DvGy0FM_{TVgX{xd=X|1K_A&qzwz2XfplbZ+ zh^)$H>7G*dRH_Y7GDD>0lxc>oty?(2|I?JTH~!DK<%%d91}HkgMMb5*)_-OkBTVBus@Zkk5^ z?sEfL>?tF!dtNLR`=p36u}QO8zDY=PnUzMj`%wqFfS)C`Cy&Y|`x8~fhk7N?;>5{#8N2CxdET--(0`_0J!dEssI}Q`I{~hta;@-%(@RwynmtZ8k|` zvzgd-V;hZaJ85h;w*AlZ{kp!diOBsaX?RIj% zZJA(-sXw>Ax%c!TTeQ;BUdnwxc7~+6M6ahesOKoGq=un6BwNF6#$e0h@b>HE5U0Nb zZurJYI@MDTe(*>ef)uq<82&zWMO@}g>B3|3ZrRW)`4ylvR5SX6;WHqwf$3728D#u7 zuBpBI!S{c>SFTV#Y`&mCu2a+6Akim0uJ{v`0$ORDq3qdE4Eo-sMr7gqrM?;dobV( z=1sO9nKXthatc2cY{fx%F3#(WzVWk6m>~L^_FQrsKU-?pik6s>>t*@=!8o8s?bwSC zIy82XPNF*RPnTSLA(G#CYHH^T-&>gpJcm?z1%E2X=L6?EW#&ozQ*MPjou5|<=;8M4 zrgFGORHk*YI7ok%q_?!ZL|7TW_c;XTRj;U((Q37%nnMfOA~+C;nyO|tQ13O!<%$-m z8=M`G{B-(bvuE>&D2UDPZ`{;1rA)qcIHwM?^vw1<23M`2vumJH);c*HQhg2wV|du9 zoW@aRpm#wMnNqsVP35Vs>IL@>xX^%t`Z=zs1l_(aEutYCJW$ON#_I@G+7Vj=rtrla zPFx-3X~f{k$xh-yeWK8!=UaQmQyeP3%2bFCL6N9(!q84aw_<{8@p1S<%D6Wx9n*_rxK zWGqW1Q+cmHdTf;(Zmmr2uYb1=r&&<0Nwn+X;nlE+bMZ3D**&R_4eOU#&K2AXC?iK~7wCduB{NrS^8Y0CQBfkD@ad*OTkfP1N7tpv2aJnY zw(`eTJ8Rk||F`U2D+@#4cMjS`7mDHrhtPNCOM>F;llGhm4hJ_QELp$SBLm4LE)jiZ zmd{3O3R#VSB`1v)Ghh>QM2X>+?#V$djtd^8m;OXGrpqlK&j>XNj-Zkk?~OvH%ns{Y%wVTSkiycmfE@c<&o{u zD?n-rR)`fRljSF}f_>cc?t)3JC&X=6d6vEri=};dUC$J14C&FXmwFR!YMl1~hOkR> zM@{ci(y=evA>fy>Tv93;0>y&dqd)q){EU43+7R~Ie2~@8oqj$ptWMs6-l5>Y57f3= zZSa&I%;>tG5E>*Gvq1#ys-RsabfY_IT`B<4!Fa5{JS@@p81r+eMA>M#6e8ID-&)Pnm zZ#j;)@Zpo$Wihzv;F&KYAS1%~O4*=7q09*1+~VfZu|lz=Us$2^!d2i$8qYvs|r9s}6aMF0C)3U_i6>Yu$4D6xxa^`43gq zA-nQ|aeAPx31Hl-=-q}VIz%1H=v=-Ni(jua+imNU&{%>pLwrt?xM@XbdNGmWsOV$^ zCLk4{@1^ODBKIL`L{oF$C+89l99OYchocSbb z9DbRnt}GzO(@1F_W&Ig-#A_Q#AwL)adyscF2TpNE*dz4vYtT_|bdV%5IgI<+mryB) zVvv(Mhaw=&nlfh_M3Tk#w(mPN^s1mQ@yC=9*t}^Lh|n$P_ZC@)nm+MoHqq*zW(8Dn#Gva)*4Tt=w!0YMl$Wq4P+LX-!5li& zgRCmy2E$=yXwZAKxw>bvaGK_3zV++yiF#(A#zzFJbliDzzdo0iVsI7Kp%7Ht&S;|R zukrCyo^4%@I`*eu2v%J%n5dqfY={hObtUyjyzOC~2r&9Q%XcaQ<0KXD&ud<(W09X~OtfYhX7?Xyg{Nni**UWat_dnF6#D5E9ldAq zrpXe=t|jD`%nkki*Zs*3YviSz_g^v@gO~K0Q(O(n?Rce^MwgTuBWaA4ChiU>xBcHt zs;m2vxenx&&uN6>Wi+pXOl<@2E@%#mJ(ZKoe=HmD@7X32Jl=67k~1O3gP1i|9SKMy zHMgLrdq6Tgzb-Nv_OSXUuJ{_4fxjuGdn3*S7cD|BuZ_HL37u;Vit0!hfe6;|jEGGJ=aNo8ExxJLs3N*{+9i}b z#r4j>&{uS*s0n%GWG9yD#-f$2#gftJ{+YT7wiWDceakq=7`bTxe2r(c&G_y$aY~ET zM;~%ZJnM!Q3f?WKe(i12k|;se8J#@gme8!p4Y0OWx;wl!tRCW*;wKM>zArPUq!FKem_Va~DB~-PH_f7_xu zj*N)$Yf$GpF4)?cngs>Z`v95{yc{fCHrdMOq~tVY0DNY zkyHZ6#S7!svNV^}pb61kiO2e#fnokG!OzcUf&E}tBZR2cZEj@G;SJ!kyz>14`|ZlrMaL2r9a4X zc-OqP@MN_pVUiw8*MkFby$p%nbLpcZeVzy8NB6US2k7+hZH9k+$p`a-LWAYDboBB6 zDR^WtBrcSJ9u)?|EPWik#BV0p;rN^{4ShJ2K#32vow?%pWR^`M4~M?+&&mIqQ? z<8o%@vwwM2#(eR<-)Yr&%F-J!%}oOBT}Y)A$MM?^n#baWC}cwy^JrJSbi|xMZp;ZB97 zvK6(q-_pihvnsScKlolHue#wxBoZGZj&P-{Bc>LLoz8GxhCZ2-P)I*@KW3@oc8XE( zC3AvWnZ4snDs~xa`2kT?4{Vj8SCP$Nn?T?$K+kU&sD!16kA%Z_MlYnHPM&jwjj}eR z)5~X1vuvykceEPpC$4x4}8HXnGWmC*=uwA=+@?v_?jqeW2V01{zLd1exWd zUFa=AYo8Y99PT7UhhpStaa_-8Fs#)ex&LI5ncea7D^zK@`yq?==q|qwobGHM&xQil zeJ8VZz^O9pd5JPDu%R2Ly^@_F&t*|jC{>p*_n;x z6HSQc2A-Uzqr~OKvK5~egp@=P{vrvqiff)KpIO?kJDpK=X3tPXF(b6bCHfUgiUi?X z^GGfrISkZeGVzBdihh65PfF4v-t<3P{MYO-2_8@?>@@EHZxmeuUj3#xtY!0lXi)0-_K^L|*P)lPknoWFMEu>*I<{euiwK^V? z(6#8*UyDleA?3&;w{y3VkmTc$5YHL(A(69t@vzFMl?^we?$}q6g!)l_kYE(J=p1|r zuNWq8$#_b$c#Xi1O1?A52lIA%$4U4*w1K3JyTE9F=@Q*72U+=q{k!!Ws{(8D(LM@B z$cc(E5d6J0OajeGrGC(s5%kgyajSrBeaxrGEGU` z34Vdjl+XB6O;4{79O1NEGLc-xYpSa!aQA#W09Fvxs5$^tF9GWJAm&`I`iMoLHe1Rf<=@A+P?F+eogoyuu zlSwEWIM{8vg%yWSUL(w0rpMb@_?&_)P|N=B-^Dcl_|~&SH6i&i9-hnZ#b<9IR_EzH z`JwAQ+twpP{z=AzKW+x9Hfl_TabZjFwo7EN60E+@IE)(mZ5s2jw!zy+HGZOTsQ-6n3iL~Y7BV);mS% zRVwmx{iBztN$(WE~tK ztP5GY4P3^}^TOnaY^%J32uqcL(VU{6T*rl>NP6HLry{Z;o6?mGQil@@DFyBJkk}Pe zPLM|h{(MoY+Uc*r7l)r#EaU&b#X%>^=9iYG<`dR`%_g!={Zzr%Uv8wSz5ee5l z0>Z%JoJiz75%|&zFZ$9e{QG};hySNX`T3vT|4)wK{J*y!6mg2(L(<=fvPH6NvecJ4 zZzO9ow6;GSJL--QfbWse5^DSZctXF{F(F|ckXp!mqDFzs*YW;ikzR6jTD2kpCn2D2 zE!4gb;GV)Mxds-3J%xv*Jj%+XU8$+X3yz?LO1B}RxyJ#)nr^}*bB~ZAs-Df!M>Syb z*BJ}{EAv##$l%yDo+CppUqejOoRBS&EOhB6sA!_4cK8lte&hCwqoIn?0?6v#8ibAJ z<|&aYiiuUMs8*}-<40W7dMp>zbm&OsG3Z6iF}Mr6yedZ?o+Bn%U2&RPsh15MzbmKu z9(QBuIWL)Rg(m@A@#0q>Je zY65A9ZwmVk+T;6kW;6E+2C%~i*2jGskNt4N_kj+-;rwotE83C-~40?lp=UE`rxPs~`sn%ftjikYBGlMNYF*n!x zgjZQSfzlk+J^?756;B(oU!Mrq^EKOlw^d=2-Plj z*tl)X=~@HK+;mM)CZ*&$WYI$|0YUWN1Gni|CMRhs(4u=QQBA2y`|>NZ<}=3mU+VP< z(=&j^>U7JhY|e|Q2s0bG9jy@S&WZPcYERo7lXl{O#j?dEz)bSn-CVkuNTDr3)vj+i z+sU#VCP6{SJpbS(Kn>gzZQO+m<}uAOi{eAnN1INAAlvk>3Bt&`jq$$r|F} zawCoPNPJ00)|1ko`xBBzC8%`}8ws2z@zZ!~Msb;B;r#K*C}i3B(I&(Nr9Y6~OxL2V zg2BW``IlL>J|o?F+q)g(ntUj)i(7f>z*nV%l8rbr%I&yqL`lF3SO6*pHq zH(*;}8pYN z5bdj$3@yA&-0tcpINoE#1(eU`(dx3Xj?!tEPlEV3Y2ReMMeS}G?mCm{s#neon+%@j zTp3mXM=D*oD3Ezv@ANyJ&XHQ5|F48Z$}}!bsJ2?>qN~-Mw9`qB>`1n(BkKb(Ct`vM zKFdn*A6;qTqai7bt;?`sWek9*NJJutMNvb#462$_gPLH|)HmK%N)uG_j)zPSoglvd z4rmq4FH`PQgIusINSwx&d>iJIHJo=Ya(R1y)V|@5Jx+N`j~erC`&)8uUI+{*mu=5he*px7FXn|t&Pz}1(AtXNGI!;tYp5G?@de~t?zvP&)4LSD4TUs7E*uBknSBV1Xm!w}=XrSIWk6+}>k zgV^GNHB>aQjv_3p-$F!CAfRd>htWGwGq2)kLE7I7WK7I%FQkR@&4t(RMT2`Z2t8Mn zSd^A7XfZS`vc|H4Hch+y+B~bz!HnPI7`DPEvU4i19BAlt0AfIsE zils#K@%O*X<0&-pu6>Ka715fL<;oC$qfRWjs&N)Ebi;bXR+LuW`5=K@Nw`(7F zWk@mM9m@N#0-5oAcseOM2Lg1rC^nD>!Y`82$+ZVapus21yvq5zw~KC+SXlYIFn#1- zFB9h$lGAb3z1yTLXbQ^|_lb&O@LC4n$8!}m*-4}8E0S0B^8J~pO8(C1=x80vH0nQB0RgHFiUWM*4k$jMdYE5P^(Aedx#CX~-);(e8B{RAWxP0r&R2JS;}A zmwf@;ouRS5dkCWB8dnm@=CT}?$h~LDSdUFFw|&=$#ld2=94nfV8K2ZPfpJ{edrPjrMvb4~<0Exl1y_g-Q5!znxEu+B& ztjRU26^OB}%x35(>Bxk9vW~*|un(!vvpX!%1>+e{dl8!&R8{Jy`IdMsFm3)ux*Ksy zZqbwbWk{ev5l3QeGpi_f5XSxai{1-Q%9XOh&MWk7^8ckOH~e)Ks`Y`{OchAWC`NN1j+dt#DKROb|W%Q5QN9WcXcy_1chv>7T50xbvGU!P2_|k+Y-!F}Elg{uv#y-v-QJ_* zEQY6NT-@;5bp-P;%nEGiZMYmX;>}|}i|NbWxA>n32ST@3st{@28`2P~>w=+gl9Gp2 z1#MV%nwhC1-Z+yP;HOY1gffIf`@%*Ycvb1vw86aomFvHqLHX>0xIKg}{}jg-5bl_M zBmsJTiFfoAb=R2Q7xJwGX$|WW6^*a~A|kjYzT5EIL?9bTHhXD=<50>P}>QZSt`;+!mXqQo#pXjy1}ih2%5_xg`ZjQz zYaIPNDoZ;|W`cbBj$aV|z5Dx$AX=+9-h2jR0yW7xk{|Fi^}r?@o(APhc&#tkfs*n1 z72;}^aML*F%J^#>tM2GiY@>9ui(b=*0*k^+rt)vJ-Zo>WI7j<#gJG3Lwb{nF+S72x zADdd%W923qW*gj{S&Ps!-;GXYaG@eX=X(-RCN@k8b6?EiDW9b&v919$`Z4)orlTbbBUDC^`FDzdSy zT^na;*S)#@ckB`ePmRSeoVIiYl3)R>lO*^l{l5sPb7kvUp&~9d_&M2b<2fR$y4l2R9=h%4Q{lTacS$b}{9vnR^(0c)4lU{5Xx_b|5rkH@ zZG(>z#7BP<*}W#6+q1 zAXlI2ghbTbxGwz+wJjm?(gixcqAQw{8cJsnP#ZcbdS2|e+9p*k0=YLhmdlhh3uNH> z&+MqHCCwBF4# zj9MdyV##n&ERG&F8J#To3)?*K&3}O#EgUMEQFp?87C#V=lD+SI+8BH9?wsM8+b~_h zM{hd;odf1KLtQ42!;K=@A;>rKUnMqlsOOXFa9?Ff&`qsp#}lSZ;UlBuTIT1b4jR9L_h+^xRO?_`b=dT%An&lGlE1JWQvO5w*P^YGuKqpRwBB3 z84P{vFMV_tkZ-$jp}3}1w5WZ2S%(u~4U(@jl;SQhqv4Y)3kvIVQoij{i$A+SATDDhw{w#5i+z+yDvO2O{lK#K3)#h z4AhSBAirY${U$%fsH|Pg7~F{>vanAVcpSV*)GZ@K7I0eqnO$9JgGW_HN$)OtJTg+T ztp6P5tuv}RFzJ46L;Xg%h!guxeVYbLd4_@kCqR-}pp?2aM~kmL7@Qc z;@S#Y6Qw9@zYWrdmf-a6fUvKnFG%6MRh=NQKXV^1ezB`hg@1mhPZ6?;&>>TLo6r}V zTa)16rH+Fp>;WVT!5TJq6Sq8iC({=S`V>X=FW8@V&SMrkRX@=y?#kaN>iy)T%%3y3 zrLPjBQ57Nkqa6zvZ6@9`X$IJtgb6Dy4qYz6YH@JB(OzxXDl(55IeQK*+<2{6Zi;<& zrF2N7nSXrK9B-XEkX|&bUd%?Axw+9XtP)zw9!jbI{05j9S{U+tG+SS{S0s!(s<*cF zwVy__*BsR>nZB->Pkn;zxTl$*tY2}pS65wJRAcB|Wp}RLI9*}rAgvt=hR#$8Kt%ln zCDmHy2#Z1~(>Qx~|AVnN(FxZ0h;s0E+Og!EA0pfvi=rS=6^G7e+yObq&x}s2a6$C+ z35U7FUKE({yD&Pol7>O^?zM_d~c8nOg_?Z&@`$IO8$!_viuNti*nO8AQz`eU&bC@w=S+|^Z~i@ zOyIL{k?W4q1V|iiu?fY?$2PTF{)YLP1owak0dPT}Ve{{HDYi4YFObX)Z+r@SSVtTm zu)@e5HBxpeB6DHk-v^xrzkcTN?N7GkfwHt~tn}6KL0Z2qRDJ8;o?I-y|CYE;&hgfS zUqx%QU14(blc{;z&?KkLg+>kpDTBRSp=+LZJtc>SaX;B)sY!>vcvQU^;qJ&e?g0pQ zKtb!Vg8xl ziU(ud$rgccK&C}?0?9?lXBs(asAuh1qW4RTO+-9ffi*?d=b_I!~d@M%DS z0B6v8;63@F=xFg(SeF2??fwb-A(RG3UOU}UJJOnIj0cms?zuyqAI*V+I){olQIoSt z-UuPpQZdl6T$G?rc-Ap9 zbf1L~y=W-AQz*Z8Lgx1{Dk>9uhWxh2G3qKTG%*uy@-kX9i9GuI`hvP4`z3)HptscG=&vvQu*303E}k>Q!Ag@|H{rP6B+jU$fHkuia_eXC&uri^)k93C2(y8t zy6y9b%nZk)vf7pFkLSYP>Pepi)%JL6PxFe)hYiSoL1c=vI1+%e};g6kn*~ z+gQs*l-cG!UFd$XV>_7^U}Jn=8$~=sL<2Lg00%)tPG*^4jI1Uijr$F>Vh*p@gpB%+ zL)meHBVl)Ecp|;_t0GOg< zYC_im;XU-HLUcK0Y$bmm;<}#j4ZNNNG>&I5{F>JHLni~Y7a2w^-J=vw(AkIlcbNUz z{U~U5Z^pjd$+=*Z*z|v{8?2y!nf=7>3po{;Cc?x<%nrl-d&vmR}$>{Te>@S zY_4>%fGr_7Bq)=pF12P|z>47!1!7`d02xt@5()VPBPq1XS%eTo7V+xDCc*Tf-AeiZ>~FqclRZvmS3|+=xyJ(oS#0p^1v^~<|~!VESF!zVy;cAi3BY! zL)V_a<2k<$0d^0pC-eLd{1<*Q9aa+to?|ZekL&funfTkyvHGX+RDcR=mJ^0k(IMt5Fv(sS2-e$?$;Y6sZ zoWcT+QvnRZE(#kc$nU#}TRr)iq*_=e7m+9M4<~bJ2-ONf`KabyawFeiW?E1-T468| zHYv@hff>Hfs3BM=J6{JQ@D2*QxMMVB?^X>D5trrFsA7FrP6o2!vQ|igF zyIQqM+p#|KyC59RjBvDflTPl8W-EC z0J=F84x^Od@mlg6tYYG4I7sQck_IomZB$0sDu$48`bv9vWem9yS6aO_)}-N?V-r)y z@3<;6A$k>;=JAD$gQ$e$mpYXR}_$Mo{{zwisA*r}qz3i9?z z#)&=r>F&DCFgq~^ddAPOH=GK)lwZ)vMS>H9-YVISl{bT)Cxi=HCSNHgq*k~_gC!7v zt4xjBGo7yR)l`?P?6qej1CCgS4!$j_Kw9Y#I1|yIoaybksEH2-$06GUrR<%gQJXQQY(<7*K*hHJ2aNS9ZZT18ER5g+)Dn z2!$MK?m@jmswJVhh%y!aoL2Ftm5a09doj&Sc22%NtyYb=u~D=@V-&T028RzxFta_P z21{O@y8oH_0s36=vgZ`n?`l0IaWC^t1F z@z)ymh%9tRIxa%mzi=2{D(so9g?k;ONIyh{Ormken<9$B|KKTMCJ+5f`u>QeeLJo3 z&ukf4$z|IZHe8?%&_(N-8%P9JIz0Yegt?-p>}w{947JGYq#XJ7@Z;K+wX=zaNuY=* zd9NcwY{3~I46!Mh9iI5=@e6S$8FwBm@Gp4{nDG#;<&MN*rHOSvm-?sDs1BSRn8Sv{ zDFbfHJW!6)#DYDSorA^1T|aL6`H0k9Mk?r21VNOA6U-5SOfg+<(0nKng`d&VWmH`Z zCgMbT;D4kmqkIJ@wT7&xzd{fWwkr?o21puvqfUIf(^x4RhB1fr_s#&H_R3;1%h)gl z{Ndc72kElq#I=ppO8tkJ0I7-L3mRt<(KKcS1+QzDkOFwP{_?H88xS#+bzic$`cnIc zVR+}jn9~rb4kqV=xxF|0*r==eDuk+pTD)Cv!lj{+(>M;!bp8CDoGno}sX2cC=E>h! z?EK2aUOu*SKhu<6mOsuSn|F9UpaV1y%gSCcM7_Cb^pnWCvZ)+AwWBX>yjF3qy!}G+ ztWwOv^hV!m45?7=Zse|Ag=Dc?l0H`$rcKpjLu5(nq0qPV}&{qD5||AJ-(GzrA42v3R{sLZG;e z1Y-d-F6ampF(p_GBonA3|AVP-3=buW$5^aWYG(O=B{QOrL>i26!T~R8IsNK4y-6#b zo7+GG6UKoNSru6yvu?#V_trf1Cs*9`E|HhFxPPS|w<3(q|4xpDWy90SW!QujY04L6 z*?DNChNPENe<+LW-Yo&AE~`0ffDf$ab}&#Hd^s)a|HdgAYJQiBdzUJ6mr8rLgnPFn zbGP)`R%-Y01y3 zc8()EiZ`KlH){?A1(n>sl>qb>oatw?{6qE>ig3kgTZ}B>o?H_UKpu diff --git a/docs/overview.html b/docs/overview.html index c5af7481..f6058234 100644 --- a/docs/overview.html +++ b/docs/overview.html @@ -7,7 +7,7 @@ - Overview - CUDA Python 12.4.0 documentation + Overview - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@

  • @@ -152,7 +152,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
  • diff --git a/docs/release.html b/docs/release.html index 1ea46a5b..99501de3 100644 --- a/docs/release.html +++ b/docs/release.html @@ -3,11 +3,11 @@ - + - Release Notes - CUDA Python 12.4.0 documentation + Release Notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    @@ -152,7 +152,7 @@
    - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
  • Code of Conduct
  • Contributing
  • Release Notes
      +
    • 12.5.0
    • 12.4.0
    • 12.3.0
    • 12.2.1
    • @@ -230,6 +231,14 @@

      Release Notes#

      @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.5.0-notes.html b/docs/release/11.5.0-notes.html index 1ec3dd66..9ed14ae2 100644 --- a/docs/release/11.5.0-notes.html +++ b/docs/release/11.5.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.5.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.5.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.6.0-notes.html b/docs/release/11.6.0-notes.html index 89ae8f2a..bddd1d5d 100644 --- a/docs/release/11.6.0-notes.html +++ b/docs/release/11.6.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.6.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.6.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.6.1-notes.html b/docs/release/11.6.1-notes.html index 77d4e372..3c6aab64 100644 --- a/docs/release/11.6.1-notes.html +++ b/docs/release/11.6.1-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.6.1 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.6.1 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.7.0-notes.html b/docs/release/11.7.0-notes.html index 5967de13..6e79ca69 100644 --- a/docs/release/11.7.0-notes.html +++ b/docs/release/11.7.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.7.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.7.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.7.1-notes.html b/docs/release/11.7.1-notes.html index 200ee172..d05d0169 100644 --- a/docs/release/11.7.1-notes.html +++ b/docs/release/11.7.1-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.7.1 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.7.1 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.8.0-notes.html b/docs/release/11.8.0-notes.html index a35bbbfa..06eac3e1 100644 --- a/docs/release/11.8.0-notes.html +++ b/docs/release/11.8.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.8.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.8.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.8.1-notes.html b/docs/release/11.8.1-notes.html index 5e4ed04d..8b5fb806 100644 --- a/docs/release/11.8.1-notes.html +++ b/docs/release/11.8.1-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.8.1 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.8.1 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.8.2-notes.html b/docs/release/11.8.2-notes.html index 2e235609..c2b9b984 100644 --- a/docs/release/11.8.2-notes.html +++ b/docs/release/11.8.2-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.8.2 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.8.2 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/11.8.3-notes.html b/docs/release/11.8.3-notes.html index 4bbeddee..94e08c8c 100644 --- a/docs/release/11.8.3-notes.html +++ b/docs/release/11.8.3-notes.html @@ -7,7 +7,7 @@ - CUDA Python 11.8.3 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 11.8.3 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/12.0.0-notes.html b/docs/release/12.0.0-notes.html index f1add95b..40d1264c 100644 --- a/docs/release/12.0.0-notes.html +++ b/docs/release/12.0.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 12.0.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 12.0.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/12.1.0-notes.html b/docs/release/12.1.0-notes.html index cee6563a..595e5d22 100644 --- a/docs/release/12.1.0-notes.html +++ b/docs/release/12.1.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 12.1.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 12.1.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/12.2.0-notes.html b/docs/release/12.2.0-notes.html index 77541c96..b1372386 100644 --- a/docs/release/12.2.0-notes.html +++ b/docs/release/12.2.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 12.2.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 12.2.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/12.2.1-notes.html b/docs/release/12.2.1-notes.html index 78ee9f76..15efc000 100644 --- a/docs/release/12.2.1-notes.html +++ b/docs/release/12.2.1-notes.html @@ -7,7 +7,7 @@ - CUDA Python 12.2.1 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 12.2.1 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • @@ -228,7 +229,7 @@

        CUDA Python 12.2.1 Release notes#

        -

        Released on January ??, 2024

        +

        Released on January 8, 2024

        Hightlights#

          diff --git a/docs/release/12.3.0-notes.html b/docs/release/12.3.0-notes.html index 081792bd..09fbacee 100644 --- a/docs/release/12.3.0-notes.html +++ b/docs/release/12.3.0-notes.html @@ -7,7 +7,7 @@ - CUDA Python 12.3.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 12.3.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • diff --git a/docs/release/12.4.0-notes.html b/docs/release/12.4.0-notes.html index f5bdbf84..158b608b 100644 --- a/docs/release/12.4.0-notes.html +++ b/docs/release/12.4.0-notes.html @@ -3,11 +3,11 @@ - + - CUDA Python 12.4.0 Release notes - CUDA Python 12.4.0 documentation + CUDA Python 12.4.0 Release notes - CUDA Python 12.5.0 documentation @@ -125,7 +125,7 @@
    • @@ -152,7 +152,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation @@ -168,6 +168,7 @@
    • Code of Conduct
    • Contributing
    • Release Notes
        +
      • 12.5.0
      • 12.4.0
      • 12.3.0
      • 12.2.1
      • @@ -288,14 +289,14 @@

        CUDA Functions Not Supported in this Release - +
        Previous
        -
        Release Notes
        +
        CUDA Python 12.5.0 Release notes
        diff --git a/docs/release/12.5.0-notes.html b/docs/release/12.5.0-notes.html new file mode 100644 index 00000000..7b91be0d --- /dev/null +++ b/docs/release/12.5.0-notes.html @@ -0,0 +1,357 @@ + + + + + + + + + + CUDA Python 12.5.0 Release notes - CUDA Python 12.5.0 documentation + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
        +
        +
        + +
        + +
        +
        + +
        + +
        +
        + +
        +
        +
        + + + + + Back to top + +
        + +
        + +
        + +
        +
        +
        +

        CUDA Python 12.5.0 Release notes#

        +

        Released on May 21, 2024

        +
        +

        Hightlights#

        +
          +
        • Rebase to CUDA Toolkit 12.5

        • +
        • Resolve Issue #58: Interop between CUdeviceptr and Runtime

        • +
        +
        +
        +

        Limitations#

        +
        +

        CUDA Functions Not Supported in this Release#

        +
          +
        • Symbol APIs

          +
            +
          • cudaGraphExecMemcpyNodeSetParamsFromSymbol

          • +
          • cudaGraphExecMemcpyNodeSetParamsToSymbol

          • +
          • cudaGraphAddMemcpyNodeToSymbol

          • +
          • cudaGraphAddMemcpyNodeFromSymbol

          • +
          • cudaGraphMemcpyNodeSetParamsToSymbol

          • +
          • cudaGraphMemcpyNodeSetParamsFromSymbol

          • +
          • cudaMemcpyToSymbol

          • +
          • cudaMemcpyFromSymbol

          • +
          • cudaMemcpyToSymbolAsync

          • +
          • cudaMemcpyFromSymbolAsync

          • +
          • cudaGetSymbolAddress

          • +
          • cudaGetSymbolSize

          • +
          • cudaGetFuncBySymbol

          • +
          +
        • +
        • Launch Options

          +
            +
          • cudaLaunchKernel

          • +
          • cudaLaunchCooperativeKernel

          • +
          • cudaLaunchCooperativeKernelMultiDevice

          • +
          +
        • +
        • cudaSetValidDevices

        • +
        • cudaVDPAUSetVDPAUDevice

        • +
        • cudaFuncGetName

        • +
        • cudaFuncGetParamInfo

        • +
        +
        +
        +
        + +
        +
        + +
        + +
        +
        + + + + + + + \ No newline at end of file diff --git a/docs/search.html b/docs/search.html index 39da4db8..8970bcd1 100644 --- a/docs/search.html +++ b/docs/search.html @@ -5,7 +5,7 @@ - Search - CUDA Python 12.4.0 documentation + Search - CUDA Python 12.5.0 documentation @@ -122,7 +122,7 @@

    • @@ -149,7 +149,7 @@
      - CUDA Python 12.4.0 documentation + CUDA Python 12.5.0 documentation