From 31e160fa9a7eeb7b87f375c2aabbcf99994dda3f Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Tue, 29 Oct 2024 19:14:59 +0100 Subject: [PATCH 1/6] bump Enzyme to support threads without following the runtime activity --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index c5c62a8f..a968624d 100644 --- a/Project.toml +++ b/Project.toml @@ -24,7 +24,7 @@ ParallelStencil_EnzymeExt = "Enzyme" AMDGPU = "0.6, 0.7, 0.8, 0.9, 1" CUDA = "3.12, 4, 5" CellArrays = "0.3" -Enzyme = "0.11, 0.12, 0.13" +Enzyme = "0.12, 0.13" MacroTools = "0.5" Polyester = "0.7" StaticArrays = "1" From 3d6cb19d2737d3ac01290e9960e4dc6666ebb031 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Tue, 29 Oct 2024 19:35:20 +0100 Subject: [PATCH 2/6] update documentation of module AD --- src/AD.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/AD.jl b/src/AD.jl index 90640f05..98b1421f 100644 --- a/src/AD.jl +++ b/src/AD.jl @@ -7,8 +7,8 @@ Provides GPU-compatible wrappers for automatic differentiation functions of the import ParallelStencil.AD # Functions -- `autodiff_deferred!`: wraps function `autodiff_deferred`. -- `autodiff_deferred_thunk!`: wraps function `autodiff_deferred_thunk`. +- `autodiff_deferred!`: wraps function `autodiff_deferred`, promoting all arguments that are not Enzyme.Annotations to Enzyme.Const. +- `autodiff_deferred_thunk!`: wraps function `autodiff_deferred_thunk`, promoting all arguments that are not Enzyme.Annotations to Enzyme.Const. # Examples const USE_GPU = true @@ -43,9 +43,6 @@ Provides GPU-compatible wrappers for automatic differentiation functions of the main() -!!! note "Enzyme runtime activity default" - If ParallelStencil is initialized with Threads, then `Enzyme.API.runtimeActivity!(true)` is called to ensure correct behavior of Enzyme. If you want to disable this behavior, then call `Enzyme.API.runtimeActivity!(false)` after loading ParallelStencil. - To see a description of a function type `?`. """ module AD From af5c97b270ca360f0f247e1033bd888956169721 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Tue, 29 Oct 2024 19:35:51 +0100 Subject: [PATCH 3/6] update documentation of module AD in PK --- src/ParallelKernel/EnzymeExt/AD.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ParallelKernel/EnzymeExt/AD.jl b/src/ParallelKernel/EnzymeExt/AD.jl index 7c1a9664..d3ef4a86 100644 --- a/src/ParallelKernel/EnzymeExt/AD.jl +++ b/src/ParallelKernel/EnzymeExt/AD.jl @@ -7,11 +7,8 @@ Provides GPU-compatible wrappers for automatic differentiation functions of the import ParallelKernel.AD # Functions -- `autodiff_deferred!`: wraps function `autodiff_deferred`. -- `autodiff_deferred_thunk!`: wraps function `autodiff_deferred_thunk`. - -!!! note "Enzyme runtime activity default" - If ParallelKernel is initialized with Threads, then `Enzyme.API.runtimeActivity!(true)` is called to ensure correct behavior of Enzyme. If you want to disable this behavior, then call `Enzyme.API.runtimeActivity!(false)` after loading ParallelStencil. +- `autodiff_deferred!`: wraps function `autodiff_deferred`, promoting all arguments that are not Enzyme.Annotations to Enzyme.Const. +- `autodiff_deferred_thunk!`: wraps function `autodiff_deferred_thunk`, promoting all arguments that are not Enzyme.Annotations to Enzyme.Const. To see a description of a function type `?`. """ From dc5b65f5789472ea015fe32df902f96cf10c2d85 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Tue, 29 Oct 2024 19:40:12 +0100 Subject: [PATCH 4/6] update autodiff_gpu --- src/ParallelKernel/EnzymeExt/autodiff_gpu.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ParallelKernel/EnzymeExt/autodiff_gpu.jl b/src/ParallelKernel/EnzymeExt/autodiff_gpu.jl index b500f01e..f086bfe8 100644 --- a/src/ParallelKernel/EnzymeExt/autodiff_gpu.jl +++ b/src/ParallelKernel/EnzymeExt/autodiff_gpu.jl @@ -2,16 +2,17 @@ import ParallelStencil import ParallelStencil: PKG_THREADS, PKG_POLYESTER import Enzyme +# NOTE: package specific initialization of Enzyme could be done as follows (not needed in the currently supported versions of Enzyme) # function ParallelStencil.ParallelKernel.AD.init_AD(package::Symbol) # if iscpu(package) # Enzyme.API.runtimeActivity!(true) # NOTE: this is currently required for Enzyme to work correctly with threads # end # end -# ParallelStencil injects a configuration parameter at the end, for Enzyme we need to wrap that parameter as a Annotation -# for all purposes this ought to be Const. This is not ideal since we might accidentially wrap other parameters the user -# provided as well. This is needed to support @parallel autodiff_deferred(...) - function promote_to_const(args::Vararg{Any,N}) where N +# NOTE: @parallel injects four parameters at the end, which need to be wrapped as Annotations. The current solution is to wrap all +# arguments which are not already Annotations (all the other arguments must be Annotations). Should this change, then one could +# explicitly wrap just the injected parameters. +function promote_to_const(args::Vararg{Any,N}) where N ntuple(Val(N)) do i @inline if !(args[i] isa Enzyme.Annotation || From e6221c6977ea85558f24d05d35a03107ada6c955 Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Tue, 29 Oct 2024 19:41:37 +0100 Subject: [PATCH 5/6] update autodiff_deferred! unit tests --- test/ParallelKernel/test_parallel.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/ParallelKernel/test_parallel.jl b/test/ParallelKernel/test_parallel.jl index e69d64c8..2f619538 100644 --- a/test/ParallelKernel/test_parallel.jl +++ b/test/ParallelKernel/test_parallel.jl @@ -23,7 +23,7 @@ Base.retry_load_extensions() # Potentially needed to load the extensions after t macro compute(A) esc(:($(INDICES[1]) + ($(INDICES[2])-1)*size($A,1))) end macro compute_with_aliases(A) esc(:(ix + (iz -1)*size($A,1))) end -import Enzyme + @static for package in TEST_PACKAGES eval(:( @testset "$(basename(@__FILE__)) (package: $(nameof($package)))" begin @testset "1. parallel macros" begin @@ -113,8 +113,8 @@ import Enzyme end return end - @parallel configcall=f!(A, B, a) AD.autodiff_deferred!(Enzyme.Reverse, Const(f!), Const, DuplicatedNoNeed(A, Ā), DuplicatedNoNeed(B, B̄), Const(a)) - Enzyme.autodiff_deferred(Enzyme.Reverse, Const(g!),Const, DuplicatedNoNeed(A_ref, Ā_ref), DuplicatedNoNeed(B_ref, B̄_ref), Const(a)) + @parallel configcall=f!(A, B, a) AD.autodiff_deferred!(Enzyme.Reverse, f!, Const, DuplicatedNoNeed(A, Ā), DuplicatedNoNeed(B, B̄), Const(a)) # NOTE: f! is automatically promoted to Const. + Enzyme.autodiff_deferred(Enzyme.Reverse, g!, Const, DuplicatedNoNeed(A_ref, Ā_ref), DuplicatedNoNeed(B_ref, B̄_ref), Const(a)) @test Array(Ā) ≈ Ā_ref @test Array(B̄) ≈ B̄_ref end From 85db9c34f78c796f50275d41f1a95bc1e0fb981b Mon Sep 17 00:00:00 2001 From: Samuel Omlin Date: Wed, 4 Dec 2024 15:39:06 +0100 Subject: [PATCH 6/6] add Const for Enzyme.autodiff_deferred function arg --- test/ParallelKernel/test_parallel.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ParallelKernel/test_parallel.jl b/test/ParallelKernel/test_parallel.jl index 2f619538..a8594e23 100644 --- a/test/ParallelKernel/test_parallel.jl +++ b/test/ParallelKernel/test_parallel.jl @@ -114,7 +114,7 @@ macro compute_with_aliases(A) esc(:(ix + (iz -1)*size($A,1) return end @parallel configcall=f!(A, B, a) AD.autodiff_deferred!(Enzyme.Reverse, f!, Const, DuplicatedNoNeed(A, Ā), DuplicatedNoNeed(B, B̄), Const(a)) # NOTE: f! is automatically promoted to Const. - Enzyme.autodiff_deferred(Enzyme.Reverse, g!, Const, DuplicatedNoNeed(A_ref, Ā_ref), DuplicatedNoNeed(B_ref, B̄_ref), Const(a)) + Enzyme.autodiff_deferred(Enzyme.Reverse, Const(g!), Const, DuplicatedNoNeed(A_ref, Ā_ref), DuplicatedNoNeed(B_ref, B̄_ref), Const(a)) @test Array(Ā) ≈ Ā_ref @test Array(B̄) ≈ B̄_ref end