From eadf7d996339c04ae5d7fa8278a2cd7d6115e406 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Wed, 25 Oct 2023 14:55:07 +0900 Subject: [PATCH 01/14] Handle ADOs vector to be consistent with the type of HEOMLS matrix --- src/ADOs.jl | 5 +++++ src/HierarchicalEOM.jl | 4 ++-- src/Spectrum.jl | 6 +++--- src/SteadyState.jl | 4 ++-- src/evolution.jl | 4 ++-- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/ADOs.jl b/src/ADOs.jl index 256d995d..9ce2b79d 100644 --- a/src/ADOs.jl +++ b/src/ADOs.jl @@ -206,4 +206,9 @@ function Expect(op, ados_list::Vector{ADOs}; take_real=true) else return exp_val end +end + +function HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: SparseMatrixCSC + TE = eltype(MatrixType) + return Vector{TE}(V) end \ No newline at end of file diff --git a/src/HierarchicalEOM.jl b/src/HierarchicalEOM.jl index a7c04dbb..d58bc621 100644 --- a/src/HierarchicalEOM.jl +++ b/src/HierarchicalEOM.jl @@ -69,7 +69,7 @@ module HierarchicalEOM export AbstractHEOMLSMatrix, M_S, M_Boson, M_Fermion, M_Boson_Fermion, AbstractParity, OddParity, EvenParity, value, ODD, EVEN, - ADOs, getRho, getADO, Expect, + ADOs, getRho, getADO, Expect, HandleVectorType, Nvec, AbstractHierarchyDict, HierarchyDict, MixHierarchyDict, getIndexEnsemble, Propagator, addBosonDissipator, addFermionDissipator, addTerminator, evolution, SteadyState @@ -80,7 +80,7 @@ module HierarchicalEOM # sub-module Spectrum for HierarchicalEOM module Spectrum - import ..HeomAPI: AbstractHEOMLSMatrix, OddParity, ADOs, spre + import ..HeomAPI: AbstractHEOMLSMatrix, OddParity, ADOs, spre, HandleVectorType import LinearAlgebra: I, kron import SparseArrays: sparse, sparsevec import LinearSolve: LinearProblem, init, solve!, UMFPACKFactorization diff --git a/src/Spectrum.jl b/src/Spectrum.jl index 4ad92f37..c2ac75ed 100644 --- a/src/Spectrum.jl +++ b/src/Spectrum.jl @@ -116,7 +116,7 @@ end # operator for calculating two-time correlation functions in frequency domain a_normal = kron(I_heom, spre(op)) a_dagger = kron(I_heom, spre(op')) - local X::Vector{ComplexF64} = a_normal * ados_vec + X = HandleVectorType(typeof(M.data), a_normal * ados_vec) Length = length(ω_list) Sω = Vector{Float64}(undef, Length) @@ -185,8 +185,8 @@ end # operators for calculating two-time correlation functions in frequency domain d_normal = kron(I_heom, spre(op)) d_dagger = kron(I_heom, spre(op')) - local X_m::Vector{ComplexF64} = d_normal * ados_vec - local X_p::Vector{ComplexF64} = d_dagger * ados_vec + X_m = HandleVectorType(typeof(M.data), d_normal * ados_vec) + X_p = HandleVectorType(typeof(M.data), d_dagger * ados_vec) Length = length(ω_list) Aω = Vector{Float64}(undef, Length) diff --git a/src/SteadyState.jl b/src/SteadyState.jl index e0cea1a7..01dbc8c1 100644 --- a/src/SteadyState.jl +++ b/src/SteadyState.jl @@ -33,7 +33,7 @@ For more details about solvers and extra options, please refer to [`LinearSolve. print("Solving steady state for auxiliary density operators...") flush(stdout) end - cache = init(LinearProblem(A, Vector(b)), solver, SOLVEROptions...) + cache = init(LinearProblem(A, HandleVectorType(typeof(M.data), b)), solver, SOLVEROptions...) sol = solve!(cache) if verbose println("[DONE]") @@ -145,7 +145,7 @@ For more details about solvers and extra options, please refer to [`Differential # problem: dρ(t)/dt = L * ρ(t) L = MatrixOperator(M.data) - prob = ODEProblem(L, Vector(ados.data), (0, Inf)) + prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (0, Inf)) # solving steady state of the ODE problem if verbose diff --git a/src/evolution.jl b/src/evolution.jl index 19a31a2e..2270e6c2 100644 --- a/src/evolution.jl +++ b/src/evolution.jl @@ -268,7 +268,7 @@ For more details about solvers and extra options, please refer to [`Differential # problem: dρ/dt = L * ρ(0) L = MatrixOperator(M.data) - prob = ODEProblem(L, Vector(ados.data), (tlist[1], tlist[end])) + prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end])) # setup integrator integrator = init( @@ -446,7 +446,7 @@ For more details about solvers and extra options, please refer to [`Differential # problem: dρ/dt = L(t) * ρ(0) ## M.dim will check whether the returned time-dependent Hamiltonian has the correct dimension - prob = ODEProblem(L, Vector(ados.data), (tlist[1], tlist[end]), (M, H, param)) + prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end]), (M, H, param)) # setup integrator integrator = init( From a8141c796e53e6e3ac175de6bb1b6cc888babe25 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Thu, 26 Oct 2023 17:18:20 +0900 Subject: [PATCH 02/14] initial commit for CUDA support --- .github/workflows/Runtests.yml | 2 ++ Project.toml | 5 +++- ext/HierarchicalEOM_CUDAExt.jl | 51 ++++++++++++++++++++++++++++++++++ src/ADOs.jl | 10 +++++++ src/evolution.jl | 6 ++-- test/CUDA/CUDAExt.jl | 40 ++++++++++++++++++++++++++ test/CUDA/Project.toml | 5 ++++ test/runtests.jl | 12 +++++++- 8 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 ext/HierarchicalEOM_CUDAExt.jl create mode 100644 test/CUDA/CUDAExt.jl create mode 100644 test/CUDA/Project.toml diff --git a/.github/workflows/Runtests.yml b/.github/workflows/Runtests.yml index 36069c2f..96fd6126 100644 --- a/.github/workflows/Runtests.yml +++ b/.github/workflows/Runtests.yml @@ -21,6 +21,8 @@ jobs: include: - julia-version: '1' group: 'HierarchicalEOM_QOExt' + - julia-version: '1' + group: 'HierarchicalEOM_CUDAExt' steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v1 diff --git a/Project.toml b/Project.toml index 7e854328..68dd0e57 100644 --- a/Project.toml +++ b/Project.toml @@ -19,9 +19,11 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SteadyStateDiffEq = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f" [weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" QuantumOptics = "6e0679c1-51ea-5a7c-ac74-d61b76210b0c" [extensions] +HierarchicalEOM_CUDAExt = "CUDA" HierarchicalEOM_QOExt = "QuantumOptics" [compat] @@ -43,9 +45,10 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" QuantumOptics = "6e0679c1-51ea-5a7c-ac74-d61b76210b0c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["BenchmarkTools", "Documenter", "LaTeXStrings", "Literate", "Pardiso", "Plots", "QuantumOptics", "Test"] +test = ["BenchmarkTools", "Documenter", "LaTeXStrings", "Literate", "Pardiso", "Pkg", "Plots", "QuantumOptics", "Test"] diff --git a/ext/HierarchicalEOM_CUDAExt.jl b/ext/HierarchicalEOM_CUDAExt.jl new file mode 100644 index 00000000..1460a063 --- /dev/null +++ b/ext/HierarchicalEOM_CUDAExt.jl @@ -0,0 +1,51 @@ +module HierarchicalEOM_CUDAExt + +using HierarchicalEOM +import HierarchicalEOM: HandleVectorType +import CUDA: cu, CuArray +import CUDA.CUSPARSE: CuSparseMatrixCSC +import SparseArrays: SparseVector + +@doc raw""" + cu(M::AbstractHEOMLSMatrix) +Return a new HEOMLS-matrix-type object with `M.data` is in the type of `CuSparseMatrixCSC{ComplexF32, Int32}` for gpu calculations. +""" +cu(M::AbstractHEOMLSMatrix) = CuSparseMatrixCSC(M) + +@doc raw""" + CuSparseMatrixCSC(M::AbstractHEOMLSMatrix) +Return a new HEOMLS-matrix-type object with `M.data` is in the type of `CuSparseMatrixCSC{ComplexF32, Int32}` for gpu calculations. +""" +function CuSparseMatrixCSC(M::T) where T <: AbstractHEOMLSMatrix + A = M.data + if typeof(A) <: CuSparseMatrixCSC + return M + else + colptr = CuArray{Int32}(A.colptr) + rowval = CuArray{Int32}(A.rowval) + nzval = CuArray{ComplexF32}(A.nzval) + A_gpu = CuSparseMatrixCSC{ComplexF32, Int32}(colptr, rowval, nzval, size(A)) + if T <: M_S + return M_S(A_gpu, M.tier, M.dim, M.N, M.sup_dim, M.parity) + elseif T <: M_Boson + return M_Boson(A_gpu, M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + elseif T <: M_Fermion + return M_Fermion(A_gpu, M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + else + return M_Boson_Fermion(A_gpu, M.Btier, M.Ftier, M.dim, M.N, M.sup_dim, M.parity, M.Bbath, M.Fbath, M.hierarchy) + end + end +end + +# for changing a `CuArray` back to `ADOs` +function HandleVectorType(V::T, cp::Bool=false) where T <: CuArray + return Vector{ComplexF64}(V) +end + +# for changing the type of `ADOs` to match the type of HEOMLS matrix +function HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: CuSparseMatrixCSC + TE = eltype(MatrixType) + return CuArray{TE}(V) +end + +end \ No newline at end of file diff --git a/src/ADOs.jl b/src/ADOs.jl index 9ce2b79d..0ff39739 100644 --- a/src/ADOs.jl +++ b/src/ADOs.jl @@ -208,6 +208,16 @@ function Expect(op, ados_list::Vector{ADOs}; take_real=true) end end +# for changing a `Vector` back to `ADOs` +function HandleVectorType(V::T, cp::Bool=true) where T <: Vector + if cp + return Vector{ComplexF64}(V) + else + return V + end +end + +# for changing the type of `ADOs` to match the type of HEOMLS matrix function HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: SparseMatrixCSC TE = eltype(MatrixType) return Vector{TE}(V) diff --git a/src/evolution.jl b/src/evolution.jl index 2270e6c2..df7ac160 100644 --- a/src/evolution.jl +++ b/src/evolution.jl @@ -268,7 +268,7 @@ For more details about solvers and extra options, please refer to [`Differential # problem: dρ/dt = L * ρ(0) L = MatrixOperator(M.data) - prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end])) + prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end])) # setup integrator integrator = init( @@ -294,7 +294,7 @@ For more details about solvers and extra options, please refer to [`Differential step!(integrator, dt, true) # save the ADOs - ados = ADOs(copy(integrator.u), M.dim, M.N, M.parity) + ados = ADOs(HandleVectorType(integrator.u), M.dim, M.N, M.parity) push!(ADOs_list, ados) if SAVE @@ -472,7 +472,7 @@ For more details about solvers and extra options, please refer to [`Differential step!(integrator, dt, true) # save the ADOs - ados = ADOs(copy(integrator.u), M.dim, M.N, M.parity) + ados = ADOs(HandleVectorType(integrator.u), M.dim, M.N, M.parity) push!(ADOs_list, ados) if SAVE diff --git a/test/CUDA/CUDAExt.jl b/test/CUDA/CUDAExt.jl new file mode 100644 index 00000000..6f088780 --- /dev/null +++ b/test/CUDA/CUDAExt.jl @@ -0,0 +1,40 @@ +using HierarchicalEOM +using CUDA +using CUDA.CUSPARSE + +# re-define the bath (make the matrix smaller) +λ = 0.1450 +W = 0.6464 +kT = 0.7414 +μ = 0.8787 +N = 3 +tier = 2 + +# System Hamiltonian +Hsys = [ + 0.6969 0.4364; + 0.4364 0.3215 +] + +# system-bath coupling operator +Q = [ + 0.1234 0.1357 + 0.2468im; + 0.1357 - 0.2468im 0.5678 +] + +# initial state +ρ0 = [ + 1 0; + 0 0 +] + +Bbath = Boson_DrudeLorentz_Pade(Q, λ, W, kT, N) +Fbath = Fermion_Lorentz_Pade(Q, λ, μ, W, kT, N) + +L_cpu = M_Boson_Fermion(Hsys, tier, tier, Bbath, Fbath; verbose=false) +L_gpu = cu(L_cpu) + +println(L_cpu) +CUDA.@time ados_cpu = evolution(L_cpu, ρ0, [0f0, 10f0]; verbose=false) +CUDA.@time ados_gpu = evolution(L_gpu, ρ0, [0f0, 10f0]; verbose=false) +@test _is_Matrix_approx(getRho(ados_cpu[end]), getRho(ados_cpu[end])) \ No newline at end of file diff --git a/test/CUDA/Project.toml b/test/CUDA/Project.toml new file mode 100644 index 00000000..fd2d704e --- /dev/null +++ b/test/CUDA/Project.toml @@ -0,0 +1,5 @@ +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +HierarchicalEOM = "a62dbcb7-80f5-4d31-9a88-8b19fd92b128" +LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 22b7cc9b..cf6a5bf4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ +using Test, Pkg using QuantumOptics using HierarchicalEOM -using Test using SparseArrays using LinearAlgebra @@ -28,6 +28,16 @@ if GROUP == "All" || GROUP == "Core" include("phys_analysis.jl") end +#if GROUP == "HierarchicalEOM_CUDAExt" +if GROUP == "All" + Pkg.activate("CUDA") + Pkg.develop(PackageSpec(path = dirname(@__DIR__))) + Pkg.instantiate() + @testset "CUDA Extension" begin + include("CUDA/CUDAExt.jl") + end +end + if (GROUP == "All" || GROUP == "HierarchicalEOM_QOExt") && HAS_EXTENSIONS @testset "QuantumOptics Extension" begin include("QOExt.jl") From beee047335e82ec3cca74cca2cbb920268349074 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Thu, 26 Oct 2023 18:10:44 +0900 Subject: [PATCH 03/14] modify print text for HEOMLS and ADOs --- src/ADOs.jl | 2 +- src/heom_matrices/heom_matrix_base.jl | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ADOs.jl b/src/ADOs.jl index 0ff39739..5ae9918f 100644 --- a/src/ADOs.jl +++ b/src/ADOs.jl @@ -106,7 +106,7 @@ iterate(A::ADOs, ::Nothing) = nothing function show(io::IO, A::ADOs) print(io, - "Auxiliary Density Operators with $(A.parity), (system) dim = $(A.dim), number N = $(A.N)\n" + "$(A.N) Auxiliary Density Operators with $(A.parity) and (system) dim = $(A.dim)\n" ) end function show(io::IO, m::MIME"text/plain", A::ADOs) show(io, A) end diff --git a/src/heom_matrices/heom_matrix_base.jl b/src/heom_matrices/heom_matrix_base.jl index 178a6821..21627eeb 100644 --- a/src/heom_matrices/heom_matrix_base.jl +++ b/src/heom_matrices/heom_matrix_base.jl @@ -68,7 +68,8 @@ function show(io::IO, M::AbstractHEOMLSMatrix) end print(io, - type, " type HEOMLS matrix with system dim = $(M.dim) and parity = $(M.parity)\n", + type, " type HEOMLS matrix acting on $(M.parity) ADOs\n", + "system dim = $(M.dim)\n", "number of ADOs N = $(M.N)\n", "data =\n" ) From 27665e87f9c75fb89313081544d427ead0df2853 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Fri, 27 Oct 2023 15:42:31 +0900 Subject: [PATCH 04/14] improve type stability --- ext/HierarchicalEOM_CUDAExt.jl | 6 +-- src/ADOs.jl | 4 +- src/HierarchicalEOM.jl | 6 +-- src/Spectrum.jl | 29 +++++++----- src/SteadyState.jl | 29 +++++++----- src/evolution.jl | 8 ++-- src/heom_matrices/M_Boson.jl | 2 +- src/heom_matrices/M_Boson_Fermion.jl | 2 +- src/heom_matrices/M_Fermion.jl | 2 +- src/heom_matrices/heom_matrix_base.jl | 18 ++++---- test/CUDA/CUDAExt.jl | 66 +++++++++++++++++++-------- test/CUDA/Project.toml | 3 +- 12 files changed, 107 insertions(+), 68 deletions(-) diff --git a/ext/HierarchicalEOM_CUDAExt.jl b/ext/HierarchicalEOM_CUDAExt.jl index 1460a063..7a2ad68b 100644 --- a/ext/HierarchicalEOM_CUDAExt.jl +++ b/ext/HierarchicalEOM_CUDAExt.jl @@ -1,7 +1,7 @@ module HierarchicalEOM_CUDAExt using HierarchicalEOM -import HierarchicalEOM: HandleVectorType +import HierarchicalEOM.HeomAPI: _HandleVectorType import CUDA: cu, CuArray import CUDA.CUSPARSE: CuSparseMatrixCSC import SparseArrays: SparseVector @@ -38,12 +38,12 @@ function CuSparseMatrixCSC(M::T) where T <: AbstractHEOMLSMatrix end # for changing a `CuArray` back to `ADOs` -function HandleVectorType(V::T, cp::Bool=false) where T <: CuArray +function _HandleVectorType(V::T, cp::Bool=false) where T <: CuArray return Vector{ComplexF64}(V) end # for changing the type of `ADOs` to match the type of HEOMLS matrix -function HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: CuSparseMatrixCSC +function _HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: CuSparseMatrixCSC TE = eltype(MatrixType) return CuArray{TE}(V) end diff --git a/src/ADOs.jl b/src/ADOs.jl index 5ae9918f..64a8cbaa 100644 --- a/src/ADOs.jl +++ b/src/ADOs.jl @@ -209,7 +209,7 @@ function Expect(op, ados_list::Vector{ADOs}; take_real=true) end # for changing a `Vector` back to `ADOs` -function HandleVectorType(V::T, cp::Bool=true) where T <: Vector +function _HandleVectorType(V::T, cp::Bool=true) where T <: Vector if cp return Vector{ComplexF64}(V) else @@ -218,7 +218,7 @@ function HandleVectorType(V::T, cp::Bool=true) where T <: Vector end # for changing the type of `ADOs` to match the type of HEOMLS matrix -function HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: SparseMatrixCSC +function _HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: SparseMatrixCSC TE = eltype(MatrixType) return Vector{TE}(V) end \ No newline at end of file diff --git a/src/HierarchicalEOM.jl b/src/HierarchicalEOM.jl index d58bc621..5d59ee5e 100644 --- a/src/HierarchicalEOM.jl +++ b/src/HierarchicalEOM.jl @@ -69,7 +69,7 @@ module HierarchicalEOM export AbstractHEOMLSMatrix, M_S, M_Boson, M_Fermion, M_Boson_Fermion, AbstractParity, OddParity, EvenParity, value, ODD, EVEN, - ADOs, getRho, getADO, Expect, HandleVectorType, + ADOs, getRho, getADO, Expect, Nvec, AbstractHierarchyDict, HierarchyDict, MixHierarchyDict, getIndexEnsemble, Propagator, addBosonDissipator, addFermionDissipator, addTerminator, evolution, SteadyState @@ -80,9 +80,9 @@ module HierarchicalEOM # sub-module Spectrum for HierarchicalEOM module Spectrum - import ..HeomAPI: AbstractHEOMLSMatrix, OddParity, ADOs, spre, HandleVectorType + import ..HeomAPI: AbstractHEOMLSMatrix, OddParity, ADOs, spre, _HandleVectorType import LinearAlgebra: I, kron - import SparseArrays: sparse, sparsevec + import SparseArrays: sparse, sparsevec, SparseMatrixCSC import LinearSolve: LinearProblem, init, solve!, UMFPACKFactorization import ProgressMeter: Progress, next! import ..HeomBase: PROGBAR_OPTIONS, HandleMatrixType diff --git a/src/Spectrum.jl b/src/Spectrum.jl index c2ac75ed..9cf83c11 100644 --- a/src/Spectrum.jl +++ b/src/Spectrum.jl @@ -107,16 +107,16 @@ end end Size = size(M, 1) - I_total = sparse(I, Size, Size) - I_heom = sparse(I, M.N, M.N) + I_heom = sparse(one(ComplexF64) * I, M.N, M.N) + I_total = _HandleIdentityType(typeof(M.data), Size) # equal to : transpose(sparse(vec(system_identity_matrix))) - I_dual_vec = transpose(sparsevec([1 + n * (M.dim + 1) for n in 0:(M.dim - 1)], ones(M.dim), M.sup_dim)) + I_dual_vec = transpose(sparsevec([1 + n * (M.dim + 1) for n in 0:(M.dim - 1)], ones(ComplexF64, M.dim), M.sup_dim)) # operator for calculating two-time correlation functions in frequency domain a_normal = kron(I_heom, spre(op)) a_dagger = kron(I_heom, spre(op')) - X = HandleVectorType(typeof(M.data), a_normal * ados_vec) + X = _HandleVectorType(typeof(M.data), a_normal * ados_vec) Length = length(ω_list) Sω = Vector{Float64}(undef, Length) @@ -137,7 +137,7 @@ end end # trace over the Hilbert space of system (expectation value) - Sω[i] = -1 * real(I_dual_vec * (a_dagger * sol.u)[1:(M.sup_dim)]) + Sω[i] = -1 * real(I_dual_vec * (a_dagger * _HandleVectorType(sol.u, false))[1:(M.sup_dim)]) if SAVE open(FILENAME, "a") do file @@ -176,17 +176,17 @@ end end Size = size(M, 1) - I_total = sparse(I, Size, Size) - I_heom = sparse(I, M.N, M.N) + I_heom = sparse(one(ComplexF64) * I, M.N, M.N) + I_total = _HandleIdentityType(typeof(M.data), Size) # equal to : transpose(sparse(vec(system_identity_matrix))) - I_dual_vec = transpose(sparsevec([1 + n * (M.dim + 1) for n in 0:(M.dim - 1)], ones(M.dim), M.sup_dim)) + I_dual_vec = transpose(sparsevec([1 + n * (M.dim + 1) for n in 0:(M.dim - 1)], ones(ComplexF64, M.dim), M.sup_dim)) # operators for calculating two-time correlation functions in frequency domain d_normal = kron(I_heom, spre(op)) d_dagger = kron(I_heom, spre(op')) - X_m = HandleVectorType(typeof(M.data), d_normal * ados_vec) - X_p = HandleVectorType(typeof(M.data), d_dagger * ados_vec) + X_m = _HandleVectorType(typeof(M.data), d_normal * ados_vec) + X_p = _HandleVectorType(typeof(M.data), d_dagger * ados_vec) Length = length(ω_list) Aω = Vector{Float64}(undef, Length) @@ -211,8 +211,8 @@ end cache_p.A = M.data + Iω sol_p = solve!(cache_p) end - Cω_m = d_dagger * sol_m.u - Cω_p = d_normal * sol_p.u + Cω_m = d_dagger * _HandleVectorType(sol_m.u, false) + Cω_p = d_normal * _HandleVectorType(sol_p.u, false) # trace over the Hilbert space of system (expectation value) Aω[i] = -1 * ( @@ -235,4 +235,9 @@ end end return Aω +end + +function _HandleIdentityType(MatrixType::Type{TM}, S::Int) where TM <: SparseMatrixCSC + ElType = eltype(MatrixType) + return sparse(one(ElType) * I, S, S) end \ No newline at end of file diff --git a/src/SteadyState.jl b/src/SteadyState.jl index 01dbc8c1..29d2821c 100644 --- a/src/SteadyState.jl +++ b/src/SteadyState.jl @@ -19,13 +19,8 @@ For more details about solvers and extra options, please refer to [`LinearSolve. error("The parity of M should be \"EVEN\".") end - A = copy(M.data) - S = size(A, 1) - A[1,1:S] .= 0 - - # sparse(row_idx, col_idx, values, row_dims, col_dims) - A += sparse(fill(1, M.dim), [(n - 1) * (M.dim + 1) + 1 for n in 1:(M.dim)], fill(1, M.dim), S, S) - + S = size(M, 1) + A = _HandleSteadyStateMatrix(typeof(M.data), M, S) b = sparsevec([1], [1. + 0.0im], S) # solving x where A * x = b @@ -33,14 +28,14 @@ For more details about solvers and extra options, please refer to [`LinearSolve. print("Solving steady state for auxiliary density operators...") flush(stdout) end - cache = init(LinearProblem(A, HandleVectorType(typeof(M.data), b)), solver, SOLVEROptions...) + cache = init(LinearProblem(A, _HandleVectorType(typeof(M.data), b)), solver, SOLVEROptions...) sol = solve!(cache) if verbose println("[DONE]") flush(stdout) end - return ADOs(sol.u, M.dim, M.N, M.parity) + return ADOs(_HandleVectorType(sol.u, false), M.dim, M.N, M.parity) end @doc raw""" @@ -145,7 +140,9 @@ For more details about solvers and extra options, please refer to [`Differential # problem: dρ(t)/dt = L * ρ(t) L = MatrixOperator(M.data) - prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (0, Inf)) + ElType = eltype(M) + tspan = Tuple{ElType, ElType}((0, Inf)) + prob = ODEProblem(L, _HandleVectorType(typeof(M.data), ados.data), tspan) # solving steady state of the ODE problem if verbose @@ -165,5 +162,15 @@ For more details about solvers and extra options, please refer to [`Differential flush(stdout) end - return ADOs(sol.u, M.dim, M.N, M.parity) + return ADOs(_HandleVectorType(sol.u, false), M.dim, M.N, M.parity) +end + +function _HandleSteadyStateMatrix(MatrixType::Type{TM}, M::AbstractHEOMLSMatrix, S::Int) where TM <: SparseMatrixCSC + ElType = eltype(M) + A = copy(M.data) + A[1,1:S] .= 0 + + # sparse(row_idx, col_idx, values, row_dims, col_dims) + A += sparse(ones(ElType, M.dim), [(n - 1) * (M.dim + 1) + 1 for n in 1:(M.dim)], ones(ElType, M.dim), S, S) + return A end \ No newline at end of file diff --git a/src/evolution.jl b/src/evolution.jl index df7ac160..335800de 100644 --- a/src/evolution.jl +++ b/src/evolution.jl @@ -268,7 +268,7 @@ For more details about solvers and extra options, please refer to [`Differential # problem: dρ/dt = L * ρ(0) L = MatrixOperator(M.data) - prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end])) + prob = ODEProblem(L, _HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end])) # setup integrator integrator = init( @@ -294,7 +294,7 @@ For more details about solvers and extra options, please refer to [`Differential step!(integrator, dt, true) # save the ADOs - ados = ADOs(HandleVectorType(integrator.u), M.dim, M.N, M.parity) + ados = ADOs(_HandleVectorType(integrator.u), M.dim, M.N, M.parity) push!(ADOs_list, ados) if SAVE @@ -446,7 +446,7 @@ For more details about solvers and extra options, please refer to [`Differential # problem: dρ/dt = L(t) * ρ(0) ## M.dim will check whether the returned time-dependent Hamiltonian has the correct dimension - prob = ODEProblem(L, HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end]), (M, H, param)) + prob = ODEProblem(L, _HandleVectorType(typeof(M.data), ados.data), (tlist[1], tlist[end]), (M, H, param)) # setup integrator integrator = init( @@ -472,7 +472,7 @@ For more details about solvers and extra options, please refer to [`Differential step!(integrator, dt, true) # save the ADOs - ados = ADOs(HandleVectorType(integrator.u), M.dim, M.N, M.parity) + ados = ADOs(_HandleVectorType(integrator.u), M.dim, M.N, M.parity) push!(ADOs_list, ados) if SAVE diff --git a/src/heom_matrices/M_Boson.jl b/src/heom_matrices/M_Boson.jl index 5a8bb5ca..bbef6141 100644 --- a/src/heom_matrices/M_Boson.jl +++ b/src/heom_matrices/M_Boson.jl @@ -56,7 +56,7 @@ Note that the parity only need to be set as `ODD` when the system contains fermi _Hsys = HandleMatrixType(Hsys, 0, "Hsys (system Hamiltonian)") Nsys = size(_Hsys, 1) sup_dim = Nsys ^ 2 - I_sup = sparse(I, sup_dim, sup_dim) + I_sup = sparse(one(ComplexF64) * I, sup_dim, sup_dim) # the Liouvillian operator for free Hamiltonian term Lsys = -1im * (spre(_Hsys) - spost(_Hsys)) diff --git a/src/heom_matrices/M_Boson_Fermion.jl b/src/heom_matrices/M_Boson_Fermion.jl index 12650d57..5232d1d9 100644 --- a/src/heom_matrices/M_Boson_Fermion.jl +++ b/src/heom_matrices/M_Boson_Fermion.jl @@ -73,7 +73,7 @@ Note that the parity only need to be set as `ODD` when the system contains fermi _Hsys = HandleMatrixType(Hsys, 0, "Hsys (system Hamiltonian)") Nsys = size(_Hsys, 1) sup_dim = Nsys ^ 2 - I_sup = sparse(I, sup_dim, sup_dim) + I_sup = sparse(one(ComplexF64) * I, sup_dim, sup_dim) # the Liouvillian operator for free Hamiltonian term Lsys = -1im * (spre(_Hsys) - spost(_Hsys)) diff --git a/src/heom_matrices/M_Fermion.jl b/src/heom_matrices/M_Fermion.jl index 553443e0..a70246dc 100644 --- a/src/heom_matrices/M_Fermion.jl +++ b/src/heom_matrices/M_Fermion.jl @@ -54,7 +54,7 @@ Generate the fermion-type HEOM Liouvillian superoperator matrix _Hsys = HandleMatrixType(Hsys, 0, "Hsys (system Hamiltonian)") Nsys = size(_Hsys, 1) sup_dim = Nsys ^ 2 - I_sup = sparse(I, sup_dim, sup_dim) + I_sup = sparse(one(ComplexF64) * I, sup_dim, sup_dim) # the Liouvillian operator for free Hamiltonian term Lsys = -1im * (spre(_Hsys) - spost(_Hsys)) diff --git a/src/heom_matrices/heom_matrix_base.jl b/src/heom_matrices/heom_matrix_base.jl index 21627eeb..bec30bdd 100644 --- a/src/heom_matrices/heom_matrix_base.jl +++ b/src/heom_matrices/heom_matrix_base.jl @@ -132,11 +132,11 @@ function addBosonDissipator(M::T, jumpOP::Vector=[]) where T <: AbstractHEOMLSMa if T <: M_S return M_S(M.data + L, M.tier, M.dim, M.N, M.sup_dim, M.parity) elseif T <: M_Boson - return M_Boson(M.data + kron(sparse(I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + return M_Boson(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) elseif T <: M_Fermion - return M_Fermion(M.data + kron(sparse(I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + return M_Fermion(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) else - return M_Boson_Fermion(M.data + kron(sparse(I, M.N, M.N), L), M.Btier, M.Ftier, M.dim, M.N, M.sup_dim, M.parity, M.Bbath, M.Fbath, M.hierarchy) + return M_Boson_Fermion(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.Btier, M.Ftier, M.dim, M.N, M.sup_dim, M.parity, M.Bbath, M.Fbath, M.hierarchy) end end end @@ -176,11 +176,11 @@ function addFermionDissipator(M::T, jumpOP::Vector=[]) where T <: AbstractHEOMLS if T <: M_S return M_S(M.data + L, M.tier, M.dim, M.N, M.sup_dim, M.parity) elseif T <: M_Boson - return M_Boson(M.data + kron(sparse(I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + return M_Boson(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) elseif T <: M_Fermion - return M_Fermion(M.data + kron(sparse(I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + return M_Fermion(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) else - return M_Boson_Fermion(M.data + kron(sparse(I, M.N, M.N), L), M.Btier, M.Ftier, M.dim, M.N, M.sup_dim, M.parity, M.Bbath, M.Fbath, M.hierarchy) + return M_Boson_Fermion(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.Btier, M.Ftier, M.dim, M.N, M.sup_dim, M.parity, M.Bbath, M.Fbath, M.hierarchy) end end end @@ -231,11 +231,11 @@ function addTerminator(M::Mtype, Bath::Union{BosonBath, FermionBath}) where Mtyp ) if Mtype <: M_Boson - return M_Boson(M.data + kron(sparse(I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + return M_Boson(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) elseif Mtype <: M_Fermion - return M_Fermion(M.data + kron(sparse(I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) + return M_Fermion(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.tier, M.dim, M.N, M.sup_dim, M.parity, M.bath, M.hierarchy) else - return M_Boson_Fermion(M.data + kron(sparse(I, M.N, M.N), L), M.Btier, M.Ftier, M.dim, M.N, M.sup_dim, M.parity, M.Bbath, M.Fbath, M.hierarchy) + return M_Boson_Fermion(M.data + kron(sparse(one(ComplexF64) * I, M.N, M.N), L), M.Btier, M.Ftier, M.dim, M.N, M.sup_dim, M.parity, M.Bbath, M.Fbath, M.hierarchy) end end end diff --git a/test/CUDA/CUDAExt.jl b/test/CUDA/CUDAExt.jl index 6f088780..30361851 100644 --- a/test/CUDA/CUDAExt.jl +++ b/test/CUDA/CUDAExt.jl @@ -3,38 +3,66 @@ using CUDA using CUDA.CUSPARSE # re-define the bath (make the matrix smaller) -λ = 0.1450 -W = 0.6464 -kT = 0.7414 -μ = 0.8787 +λ = 0.01 +W = 0.5 +kT = 0.5 +μ = 0 N = 3 -tier = 2 +tier = 3 # System Hamiltonian Hsys = [ - 0.6969 0.4364; - 0.4364 0.3215 + 0 0; + 0 0 ] # system-bath coupling operator -Q = [ - 0.1234 0.1357 + 0.2468im; - 0.1357 - 0.2468im 0.5678 +Qb = [ + 0 1; + 1 0 +] +Qf = [ + 0 1; + 0 0 ] # initial state ρ0 = [ - 1 0; - 0 0 + 0 0; + 0 1 ] -Bbath = Boson_DrudeLorentz_Pade(Q, λ, W, kT, N) -Fbath = Fermion_Lorentz_Pade(Q, λ, μ, W, kT, N) +Bbath = Boson_DrudeLorentz_Pade(Qb, λ, W, kT, N) +Fbath = Fermion_Lorentz_Pade(Qf, λ, μ, W, kT, N) -L_cpu = M_Boson_Fermion(Hsys, tier, tier, Bbath, Fbath; verbose=false) +# Solving time Evolution +## Schrodinger HEOMLS +L_cpu = M_S(Hsys; verbose=false) L_gpu = cu(L_cpu) +ados_cpu = evolution(L_cpu, ρ0, [0f0, 10f0]; verbose=false) +ados_gpu = evolution(L_gpu, ρ0, [0f0, 10f0]; verbose=false) +@test _is_Matrix_approx(getRho(ados_cpu[end]), getRho(ados_cpu[end])) -println(L_cpu) -CUDA.@time ados_cpu = evolution(L_cpu, ρ0, [0f0, 10f0]; verbose=false) -CUDA.@time ados_gpu = evolution(L_gpu, ρ0, [0f0, 10f0]; verbose=false) -@test _is_Matrix_approx(getRho(ados_cpu[end]), getRho(ados_cpu[end])) \ No newline at end of file +## Boson HEOMLS +L_cpu = M_Boson(Hsys, tier, Bbath; verbose=false) +L_gpu = cu(L_cpu) +ados_cpu = evolution(L_cpu, ρ0, [0f0, 10f0]; verbose=false) +ados_gpu = evolution(L_gpu, ρ0, [0f0, 10f0]; verbose=false) +@test _is_Matrix_approx(getRho(ados_cpu[end]), getRho(ados_cpu[end])) + +## Boson Fermion HEOMLS +L_cpu = M_Fermion(Hsys, tier, Fbath; verbose=false) +L_gpu = cu(L_cpu) +ados_cpu = evolution(L_cpu, ρ0, [0f0, 10f0]; verbose=false) +ados_gpu = evolution(L_gpu, ρ0, [0f0, 10f0]; verbose=false) +@test _is_Matrix_approx(getRho(ados_cpu[end]), getRho(ados_cpu[end])) + +## Boson Fermion HEOMLS +L_cpu = M_Boson_Fermion(Hsys, tier, tier, Bbath, Fbath; verbose=false) +L_gpu = cu(L_cpu) +tlist = 0f0:1f0:10f0 +ados_cpu = evolution(L_cpu, ρ0, tlist; verbose=false) +ados_gpu = evolution(L_gpu, ρ0, tlist; verbose=false) +for i in 1:length(tlist) + @test _is_Matrix_approx(getRho(ados_cpu[i]), getRho(ados_cpu[i])) +end \ No newline at end of file diff --git a/test/CUDA/Project.toml b/test/CUDA/Project.toml index fd2d704e..6674b9ae 100644 --- a/test/CUDA/Project.toml +++ b/test/CUDA/Project.toml @@ -1,5 +1,4 @@ [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" HierarchicalEOM = "a62dbcb7-80f5-4d31-9a88-8b19fd92b128" -LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" -OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" \ No newline at end of file +LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" \ No newline at end of file From 4cb5e6c77dfb1d5d3f1f61c49be180ed06a4f4a2 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Fri, 27 Oct 2023 18:11:04 +0900 Subject: [PATCH 05/14] add tests for SteadyState solver in CUDAExt --- ext/HierarchicalEOM_CUDAExt.jl | 22 +++++++++++++++++++++- test/CUDA/CUDAExt.jl | 7 ++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/ext/HierarchicalEOM_CUDAExt.jl b/ext/HierarchicalEOM_CUDAExt.jl index 7a2ad68b..8c1dc4b1 100644 --- a/ext/HierarchicalEOM_CUDAExt.jl +++ b/ext/HierarchicalEOM_CUDAExt.jl @@ -1,7 +1,8 @@ module HierarchicalEOM_CUDAExt using HierarchicalEOM -import HierarchicalEOM.HeomAPI: _HandleVectorType +import HierarchicalEOM.HeomAPI: _HandleVectorType, _HandleSteadyStateMatrix +import HierarchicalEOM.Spectrum: _HandleIdentityType import CUDA: cu, CuArray import CUDA.CUSPARSE: CuSparseMatrixCSC import SparseArrays: SparseVector @@ -48,4 +49,23 @@ function _HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: Cu return CuArray{TE}(V) end +function _HandleSteadyStateMatrix(MatrixType::Type{TM}, M::AbstractHEOMLSMatrix, S::Int) where TM <: CuSparseMatrixCSC + colptr = Vector{Int32}(M.data.colptr) + rowval = Vector{Int32}(M.data.rowval) + nzval = Vector{ComplexF32}(M.data.rowval) + A = SparseMatrixCSC{ComplexF32, Int32}(S, S, colptr, rowval, nzval) + A[1,1:S] .= 0 + + # sparse(row_idx, col_idx, values, row_dims, col_dims) + A += sparse(ones(Int32, M.dim), [Int32((n - 1) * (M.dim + 1) + 1) for n in 1:(M.dim)], ones(ComplexF32, M.dim), S, S) + return CuSparseMatrixCSC(A) +end + +function _HandleIdentityType(MatrixType::Type{TM}, S::Int) where TM <: CuSparseMatrixCSC + colptr = CuArray{Int32}(Int32(1):Int32(S+1)) + rowval = CuArray{Int32}(Int32(1):Int32(S)) + nzval = CuArray{ComplexF32}(ones(ComplexF32, S)) + return CuSparseMatrixCSC{ComplexF32, Int32}(colptr, rowval, nzval, (S, S)) +end + end \ No newline at end of file diff --git a/test/CUDA/CUDAExt.jl b/test/CUDA/CUDAExt.jl index 30361851..d3e6ce49 100644 --- a/test/CUDA/CUDAExt.jl +++ b/test/CUDA/CUDAExt.jl @@ -1,6 +1,7 @@ using HierarchicalEOM using CUDA using CUDA.CUSPARSE +using LinearSolve # re-define the bath (make the matrix smaller) λ = 0.01 @@ -65,4 +66,8 @@ ados_cpu = evolution(L_cpu, ρ0, tlist; verbose=false) ados_gpu = evolution(L_gpu, ρ0, tlist; verbose=false) for i in 1:length(tlist) @test _is_Matrix_approx(getRho(ados_cpu[i]), getRho(ados_cpu[i])) -end \ No newline at end of file +end + +# Steady State +CUDA.@time ados_cpu = SteadyState(L_cpu; verbose=false) +CUDA.@time ados_gpu = SteadyState(L_gpu; solver=Krylov_GMRES(), verbose=false) \ No newline at end of file From 08cdc1e38d1bacad821a52c376a2ea382c8b7ce3 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 09:36:36 +0900 Subject: [PATCH 06/14] [bugfix] for CUDA extension --- ext/HierarchicalEOM_CUDAExt.jl | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/ext/HierarchicalEOM_CUDAExt.jl b/ext/HierarchicalEOM_CUDAExt.jl index 8c1dc4b1..a96d0631 100644 --- a/ext/HierarchicalEOM_CUDAExt.jl +++ b/ext/HierarchicalEOM_CUDAExt.jl @@ -5,7 +5,7 @@ import HierarchicalEOM.HeomAPI: _HandleVectorType, _HandleSteadyStateMatrix import HierarchicalEOM.Spectrum: _HandleIdentityType import CUDA: cu, CuArray import CUDA.CUSPARSE: CuSparseMatrixCSC -import SparseArrays: SparseVector +import SparseArrays: sparse, SparseVector, SparseMatrixCSC @doc raw""" cu(M::AbstractHEOMLSMatrix) @@ -49,17 +49,18 @@ function _HandleVectorType(MatrixType::Type{TM}, V::SparseVector) where TM <: Cu return CuArray{TE}(V) end -function _HandleSteadyStateMatrix(MatrixType::Type{TM}, M::AbstractHEOMLSMatrix, S::Int) where TM <: CuSparseMatrixCSC - colptr = Vector{Int32}(M.data.colptr) - rowval = Vector{Int32}(M.data.rowval) - nzval = Vector{ComplexF32}(M.data.rowval) - A = SparseMatrixCSC{ComplexF32, Int32}(S, S, colptr, rowval, nzval) - A[1,1:S] .= 0 - - # sparse(row_idx, col_idx, values, row_dims, col_dims) - A += sparse(ones(Int32, M.dim), [Int32((n - 1) * (M.dim + 1) + 1) for n in 1:(M.dim)], ones(ComplexF32, M.dim), S, S) - return CuSparseMatrixCSC(A) -end +##### We first remove this part because there are errors when solveing steady states using GPU +# function _HandleSteadyStateMatrix(MatrixType::Type{TM}, M::AbstractHEOMLSMatrix, S::Int) where TM <: CuSparseMatrixCSC +# colptr = Vector{Int32}(M.data.colPtr) +# rowval = Vector{Int32}(M.data.rowVal) +# nzval = Vector{ComplexF32}(M.data.nzVal) +# A = SparseMatrixCSC{ComplexF32, Int32}(S, S, colptr, rowval, nzval) +# A[1,1:S] .= 0f0 +# +# # sparse(row_idx, col_idx, values, row_dims, col_dims) +# A += sparse(ones(Int32, M.dim), [Int32((n - 1) * (M.dim + 1) + 1) for n in 1:(M.dim)], ones(ComplexF32, M.dim), S, S) +# return CuSparseMatrixCSC(A) +# end function _HandleIdentityType(MatrixType::Type{TM}, S::Int) where TM <: CuSparseMatrixCSC colptr = CuArray{Int32}(Int32(1):Int32(S+1)) From 56ba85db4be1d80205baddb8f8160d4e1bfc47be Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 09:37:08 +0900 Subject: [PATCH 07/14] add basic tests for CUDA extension --- test/CUDA/CUDAExt.jl | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/test/CUDA/CUDAExt.jl b/test/CUDA/CUDAExt.jl index d3e6ce49..41acd0f0 100644 --- a/test/CUDA/CUDAExt.jl +++ b/test/CUDA/CUDAExt.jl @@ -1,6 +1,5 @@ using HierarchicalEOM using CUDA -using CUDA.CUSPARSE using LinearSolve # re-define the bath (make the matrix smaller) @@ -68,6 +67,33 @@ for i in 1:length(tlist) @test _is_Matrix_approx(getRho(ados_cpu[i]), getRho(ados_cpu[i])) end -# Steady State -CUDA.@time ados_cpu = SteadyState(L_cpu; verbose=false) -CUDA.@time ados_gpu = SteadyState(L_gpu; solver=Krylov_GMRES(), verbose=false) \ No newline at end of file +# SIAM +ϵ = -5 +U = 10 +σm = [0 1; 0 0] ## σ- +σz = [1 0; 0 -1] ## σz +II = [1 0; 0 1] ## identity matrix +d_up = kron( σm, II) +d_dn = kron(-1 * σz, σm) +Hsys = ϵ * (d_up' * d_up + d_dn' * d_dn) + U * (d_up' * d_up * d_dn' * d_dn) +Γ = 2 +μ = 0 +W = 10 +kT = 0.5 +N = 5 +tier = 3 +bath_up = Fermion_Lorentz_Pade(d_up, Γ, μ, W, kT, N) +bath_dn = Fermion_Lorentz_Pade(d_dn, Γ, μ, W, kT, N) +bath_list = [bath_up, bath_dn] + +## solve density of states +ωlist = -10:1:10 +L_cpu = M_Fermion(Hsys, tier, bath_list; verbose=false) +L_odd_cpu = M_Fermion(Hsys, tier, bath_list, ODD; verbose=false) +L_odd_gpu = cu(L_odd_cpu) +ados_cpu = SteadyState(L_cpu; verbose=false) +dos_cpu = spectrum(L_odd_cpu, ados_cpu, d_up, ωlist; verbose=false) +dos_gpu = spectrum(L_odd_gpu, ados_cpu, d_up, ωlist; solver=KrylovJL_BICGSTAB(rtol=1f-10, atol=1f-12), verbose=false) +for (i, ω) in enumerate(ωlist) + @test dos_cpu[i] ≈ dos_gpu[i] atol = 1e-6 +end \ No newline at end of file From efeec052fe15606d78a0ef978603c2e6a70d1a23 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 15:43:45 +0900 Subject: [PATCH 08/14] improve CUDA extension --- ext/HierarchicalEOM_CUDAExt.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/HierarchicalEOM_CUDAExt.jl b/ext/HierarchicalEOM_CUDAExt.jl index a96d0631..f428a247 100644 --- a/ext/HierarchicalEOM_CUDAExt.jl +++ b/ext/HierarchicalEOM_CUDAExt.jl @@ -3,6 +3,7 @@ module HierarchicalEOM_CUDAExt using HierarchicalEOM import HierarchicalEOM.HeomAPI: _HandleVectorType, _HandleSteadyStateMatrix import HierarchicalEOM.Spectrum: _HandleIdentityType +import CUDA import CUDA: cu, CuArray import CUDA.CUSPARSE: CuSparseMatrixCSC import SparseArrays: sparse, SparseVector, SparseMatrixCSC @@ -65,7 +66,7 @@ end function _HandleIdentityType(MatrixType::Type{TM}, S::Int) where TM <: CuSparseMatrixCSC colptr = CuArray{Int32}(Int32(1):Int32(S+1)) rowval = CuArray{Int32}(Int32(1):Int32(S)) - nzval = CuArray{ComplexF32}(ones(ComplexF32, S)) + nzval = CUDA.ones(ComplexF32, S) return CuSparseMatrixCSC{ComplexF32, Int32}(colptr, rowval, nzval, (S, S)) end From 5bf2a1c31f0784ba7f44a5f2f9cb0714a78c63dc Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 15:44:01 +0900 Subject: [PATCH 09/14] fix runtests --- test/runtests.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index cf6a5bf4..063e001b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -28,8 +28,7 @@ if GROUP == "All" || GROUP == "Core" include("phys_analysis.jl") end -#if GROUP == "HierarchicalEOM_CUDAExt" -if GROUP == "All" +if GROUP == "HierarchicalEOM_CUDAExt" Pkg.activate("CUDA") Pkg.develop(PackageSpec(path = dirname(@__DIR__))) Pkg.instantiate() From 7f50a9f629e76ad40c67311a8ac08a7b970dab26 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 15:44:21 +0900 Subject: [PATCH 10/14] update docs for CUDA extension --- docs/Project.toml | 2 + docs/make.jl | 3 +- docs/src/extensions/CUDA.md | 108 ++++++++++++++++++++++++++++++++++++ docs/src/spectrum.md | 3 + docs/src/time_evolution.md | 4 ++ 5 files changed, 119 insertions(+), 1 deletion(-) create mode 100644 docs/src/extensions/CUDA.md diff --git a/docs/Project.toml b/docs/Project.toml index edb712e6..870a3232 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,6 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" HierarchicalEOM = "a62dbcb7-80f5-4d31-9a88-8b19fd92b128" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" @@ -12,6 +13,7 @@ QuantumOptics = "6e0679c1-51ea-5a7c-ac74-d61b76210b0c" [compat] BenchmarkTools = "1.3" +CUDA = "5" Documenter = "0.27, 1" HierarchicalEOM = "1" LaTeXStrings = "1" diff --git a/docs/make.jl b/docs/make.jl index 733983da..cf2d1570 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -77,7 +77,8 @@ const PAGES = Any[ "Examples" => EX_output_files, "Benchmark Solvers" => BM_output_files, "Extensions" => Any[ - "QuantumOptics.jl" => "extensions/QuantumOptics.md" + "QuantumOptics.jl" => "extensions/QuantumOptics.md", + "CUDA.jl" => "extensions/CUDA.md" ] ], "Library API" => "libraryAPI.md" diff --git a/docs/src/extensions/CUDA.md b/docs/src/extensions/CUDA.md new file mode 100644 index 00000000..2a7d194e --- /dev/null +++ b/docs/src/extensions/CUDA.md @@ -0,0 +1,108 @@ +# [Extension for CUDA.jl](@id doc-ext-CUDA) + +This is an extension to support GPU ([`CUDA.jl`](https://github.com/JuliaGPU/CUDA.jl)) acceleration for solving the [time evolution](@ref doc-Time-Evolution) and [spectrum](@ref doc-Spectrum). This improves the execution time and memory usage especially when the HEOMLS matrix is super large. + +!!! compat "Compat" + The described feature requires `Julia 1.9+`. + +The functions [`evolution`](@ref doc-Time-Evolution) (only supports ODE method with time-independent system Hamiltonian) and [`spectrum`](@ref doc-Spectrum) will automatically choose to solve on CPU or GPU depend on the type of the sparse matrix in `M::AbstractHEOMLSMatrix` objects (i.e., the type of the field `M.data`). +```julia +typeof(M.data) <: SparseMatrixCSC # solve on CPU +typeof(M.data) <: CuSparseMatrixCSC # solve on GPU +``` + +Therefore, we wrapped several functions in `CUDA` and `CUDA.CUSPARSE` in order to return a new HEOMLS-matrix-type object with `M.data` is in the type of `CuSparseMatrix`, and also change the element type into `ComplexF32` and `Int32` (since GPU performs better in this type). The functions are listed as follows: +- `cu(M::AbstractHEOMLSMatrix)` : Translate `M.data` into the type `CuSparseMatrixCSC{ComplexF32, Int32}` +- `CuSparseMatrixCSC(M::AbstractHEOMLSMatrix)` : Translate `M.data` into the type `CuSparseMatrixCSC{ComplexF32, Int32}` + +### Demonstration + +The extension will be automatically loaded if user imports the package `CUDA.jl` : + +````@example CUDA_Ext_example +using BenchmarkTools +using CUDA +using HierarchicalEOM +using LinearSolve # to change the solver for better GPU performance +using Plots +```` + +### Check version info. of `HierarchicalEOM.jl` + +````@example CUDA_Ext_example +HierarchicalEOM.versioninfo() +```` + +### Check version info. of `CUDA.jl` + +````@example CUDA_Ext_example +CUDA.versioninfo() +```` + +### Setup + +Here, we demonstrate this extension by using the example of [the single-impurity Anderson model](@ref exp-SIAM). + +````@example CUDA_Ext_example +ϵ = -5 +U = 10 +Γ = 2 +μ = 0 +W = 10 +kT = 0.5 +N = 5 +tier = 3 + +tlist = 0f0:1f-1:10f0 # same as 0:0.1:10 but in the type of `Float32` +ωlist = -10f0:1f0:10f0 # same as -10:1:10 but in the type of `Float32` + +σm = [0 1; 0 0] +σz = [1 0; 0 -1] +II = [1 0; 0 1] +d_up = kron( σm, II) +d_dn = kron(-1 * σz, σm) +ρ0 = kron([1 0; 0 0], [1 0; 0 0]) +Hsys = ϵ * (d_up' * d_up + d_dn' * d_dn) + U * (d_up' * d_up * d_dn' * d_dn) + +bath_up = Fermion_Lorentz_Pade(d_up, Γ, μ, W, kT, N) +bath_dn = Fermion_Lorentz_Pade(d_dn, Γ, μ, W, kT, N) +bath_list = [bath_up, bath_dn] + +# even HEOMLS matrix +M_even_cpu = M_Fermion(Hsys, tier, bath_list; verbose=false) +M_even_gpu = cu(M_even_cpu) + +# odd HEOMLS matrix +M_odd_cpu = M_Fermion(Hsys, tier, bath_list, ODD; verbose=false) +M_odd_gpu = cu(M_odd_cpu) + +# solve steady state with CPU +ados_ss = SteadyState(M_even_cpu); +```` + +!!! note "Note" + This extension does not support for solving [`SteadyState`](@ref doc-Stationary-State) on GPU since it is not efficient and might get wrong solutions. If you really want to obtain the stationary state with GPU, you can repeatedly solve the [`evolution`](@ref doc-Time-Evolution) until you find it. + +### Solving time evolution with CPU + +````@example CUDA_Ext_example +@benchmark ados_list_cpu = evolution(M_even_cpu, ρ0, tlist; verbose=false) +```` + +### Solving time evolution with GPU + +````@example CUDA_Ext_example +@benchmark ados_list_gpu = evolution(M_even_gpu, ρ0, tlist; verbose=false) +```` + +### Solving Spectrum with CPU + +````@example CUDA_Ext_example +@benchmark dos_cpu = spectrum(M_odd_cpu, ados_ss, d_up, ωlist; verbose=false) +```` + +### Solving Spectrum with GPU + +````@example CUDA_Ext_example +@benchmark dos_gpu = spectrum(M_odd_gpu, ados_ss, d_up, ωlist; solver=KrylovJL_BICGSTAB(rtol=1f-10, atol=1f-12), verbose=false) +```` \ No newline at end of file diff --git a/docs/src/spectrum.md b/docs/src/spectrum.md index d47edb64..ccee275a 100644 --- a/docs/src/spectrum.md +++ b/docs/src/spectrum.md @@ -13,6 +13,9 @@ The function [`spectrum`](@ref) will automatically detect the [parity](@ref doc- `HierarchicalEOM.jl` wraps some of the functions in [LinearSolve.jl](http://linearsolve.sciml.ai/stable/), which is a very rich numerical library for solving the linear problems and provides many solvers. It offers quite a few options for the user to tailor the solver to their specific needs. The default solver (and its corresponding settings) are chosen to suit commonly encountered problems and should work fine for most of the cases. If you require more specialized methods, such as the choice of algorithm, please refer to [benchmark for LinearSolve solvers](@ref benchmark-LS-solvers) and also the documentation of [LinearSolve.jl](http://linearsolve.sciml.ai/stable/). +!!! compat "Extension for CUDA.jl" + `HierarchicalEOM.jl` provides an extension to support GPU ([`CUDA.jl`](https://github.com/JuliaGPU/CUDA.jl)) acceleration for solving the spectrum, but this feature requires `Julia 1.9+` and `HierarchicalEOM 1.1+`. See [here](@ref doc-ext-CUDA) for more details. + ## [Power Spectral Density](@id doc-PSD) Start from the spectrum for bosonic systems (power spectral density) in the time-domain. We write the system two-time correlation function in terms of the propagator ``\hat{\mathcal{G}}(t)=\exp(\hat{\mathcal{M}} t)`` for ``t>0``. The power spectral density ``S(\omega)`` can be obtained as ```math diff --git a/docs/src/time_evolution.md b/docs/src/time_evolution.md index 45f19ad0..8c6f9a9a 100644 --- a/docs/src/time_evolution.md +++ b/docs/src/time_evolution.md @@ -53,6 +53,10 @@ end ## Ordinary Differential Equation Method The first method is implemented by solving the ordinary differential equation (ODE) as shown above. `HierarchicalEOM.jl` wraps some of the functions in [`DifferentialEquations.jl`](https://diffeq.sciml.ai/stable/), which is a very rich numerical library for solving the differential equations and provides many ODE solvers. It offers quite a few options for the user to tailor the solver to their specific needs. The default solver (and its corresponding settings) are chosen to suit commonly encountered problems and should work fine for most of the cases. If you require more specialized methods, such as the choice of algorithm, please refer to [benchmarks for DifferentialEquations solvers](@ref benchmark-ODE-solvers) and also the documentation of [`DifferentialEquations.jl`](https://diffeq.sciml.ai/stable/). +!!! compat "Extension for CUDA.jl" + `HierarchicalEOM.jl` provides an extension to support GPU ([`CUDA.jl`](https://github.com/JuliaGPU/CUDA.jl)) acceleration for solving the time evolution (only for ODE method with time-independent system Hamiltonian), but this feature requires `Julia 1.9+` and `HierarchicalEOM 1.1+`. See [here](@ref doc-ext-CUDA) for more details. + + ### Given the initial state as Density Operator (`AbstractMatrix` type) See the docstring of this method: From 67ecb5e89f970eab3a7165f0af3ede8e31d639de Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 21:32:30 +0900 Subject: [PATCH 11/14] fix docs and CI --- .buildkite/pipeline.yml | 19 +++++++++++++++ .github/workflows/Runtests.yml | 2 -- docs/src/extensions/CUDA.md | 42 ++++++++++++---------------------- 3 files changed, 33 insertions(+), 30 deletions(-) create mode 100644 .buildkite/pipeline.yml diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 00000000..807314b5 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,19 @@ +steps: + - label: "HierarchicalEOM_CUDAExt" + plugins: + - JuliaCI/julia#v1: + version: "1" + - JuliaCI/julia-test#v1: + test_args: "--quickfail" + - JuliaCI/julia-coverage#v1: + dirs: + - src + agents: + queue: "juliagpu" + cuda: "*" + env: + GROUP: "HierarchicalEOM_CUDAExt" + SECRET_CODECOV_TOKEN: "kaIXEN51HinaQ4JGclQcIgxeMMtXDb5uvnP3E2eKrH4Eruf2pKd5QwUGcIVL8+rcWeo5FWj883rNxRQEH3YeCWs6/i7vzs+ORvG51QeCNYQgNqFzPsWRcq5qJYc+JPFbisS7q9nghqWTwr52cnjarD4Xx3ceGorMyS5NvFpCNxMgqHNyGkLvipxcTTJfKZK61bpnbntoIjiIO1XSZKjcxnXFGFnolV9BHCr5v8f7F42n2tUH7X3nDHmTBr1AbO2lFAU9ra/KezHcIf0wg2HcV8LZD0+mj8q/SBPjQZSH7cxwx4Q2eTjT4Sw7xnrBGuySVm8ZPCAV7nRNEHo+VqR+GQ==" + timeout_in_minutes: 30 + # Don't run Buildkite if the commit message includes the text [skip tests] + if: build.message !~ /\[skip tests\]/ \ No newline at end of file diff --git a/.github/workflows/Runtests.yml b/.github/workflows/Runtests.yml index 96fd6126..36069c2f 100644 --- a/.github/workflows/Runtests.yml +++ b/.github/workflows/Runtests.yml @@ -21,8 +21,6 @@ jobs: include: - julia-version: '1' group: 'HierarchicalEOM_QOExt' - - julia-version: '1' - group: 'HierarchicalEOM_CUDAExt' steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v1 diff --git a/docs/src/extensions/CUDA.md b/docs/src/extensions/CUDA.md index 2a7d194e..49ec42e6 100644 --- a/docs/src/extensions/CUDA.md +++ b/docs/src/extensions/CUDA.md @@ -20,30 +20,16 @@ Therefore, we wrapped several functions in `CUDA` and `CUDA.CUSPARSE` in order t The extension will be automatically loaded if user imports the package `CUDA.jl` : ````@example CUDA_Ext_example -using BenchmarkTools using CUDA using HierarchicalEOM using LinearSolve # to change the solver for better GPU performance -using Plots -```` - -### Check version info. of `HierarchicalEOM.jl` - -````@example CUDA_Ext_example -HierarchicalEOM.versioninfo() -```` - -### Check version info. of `CUDA.jl` - -````@example CUDA_Ext_example -CUDA.versioninfo() ```` ### Setup Here, we demonstrate this extension by using the example of [the single-impurity Anderson model](@ref exp-SIAM). -````@example CUDA_Ext_example +```julia ϵ = -5 U = 10 Γ = 2 @@ -78,31 +64,31 @@ M_odd_gpu = cu(M_odd_cpu) # solve steady state with CPU ados_ss = SteadyState(M_even_cpu); -```` +``` !!! note "Note" This extension does not support for solving [`SteadyState`](@ref doc-Stationary-State) on GPU since it is not efficient and might get wrong solutions. If you really want to obtain the stationary state with GPU, you can repeatedly solve the [`evolution`](@ref doc-Time-Evolution) until you find it. ### Solving time evolution with CPU -````@example CUDA_Ext_example -@benchmark ados_list_cpu = evolution(M_even_cpu, ρ0, tlist; verbose=false) -```` +```julia +ados_list_cpu = evolution(M_even_cpu, ρ0, tlist; verbose=false) +``` ### Solving time evolution with GPU -````@example CUDA_Ext_example -@benchmark ados_list_gpu = evolution(M_even_gpu, ρ0, tlist; verbose=false) -```` +```julia +ados_list_gpu = evolution(M_even_gpu, ρ0, tlist; verbose=false) +``` ### Solving Spectrum with CPU -````@example CUDA_Ext_example -@benchmark dos_cpu = spectrum(M_odd_cpu, ados_ss, d_up, ωlist; verbose=false) -```` +```julia +dos_cpu = spectrum(M_odd_cpu, ados_ss, d_up, ωlist; verbose=false) +``` ### Solving Spectrum with GPU -````@example CUDA_Ext_example -@benchmark dos_gpu = spectrum(M_odd_gpu, ados_ss, d_up, ωlist; solver=KrylovJL_BICGSTAB(rtol=1f-10, atol=1f-12), verbose=false) -```` \ No newline at end of file +```julia +dos_gpu = spectrum(M_odd_gpu, ados_ss, d_up, ωlist; solver=KrylovJL_BICGSTAB(rtol=1f-10, atol=1f-12), verbose=false) +``` \ No newline at end of file From a60f366b2917f633b7862a4e9d5c3164895f1c83 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 21:46:53 +0900 Subject: [PATCH 12/14] fix CI --- .buildkite/pipeline.yml | 19 ------------------- docs/Project.toml | 2 -- 2 files changed, 21 deletions(-) delete mode 100644 .buildkite/pipeline.yml diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml deleted file mode 100644 index 807314b5..00000000 --- a/.buildkite/pipeline.yml +++ /dev/null @@ -1,19 +0,0 @@ -steps: - - label: "HierarchicalEOM_CUDAExt" - plugins: - - JuliaCI/julia#v1: - version: "1" - - JuliaCI/julia-test#v1: - test_args: "--quickfail" - - JuliaCI/julia-coverage#v1: - dirs: - - src - agents: - queue: "juliagpu" - cuda: "*" - env: - GROUP: "HierarchicalEOM_CUDAExt" - SECRET_CODECOV_TOKEN: "kaIXEN51HinaQ4JGclQcIgxeMMtXDb5uvnP3E2eKrH4Eruf2pKd5QwUGcIVL8+rcWeo5FWj883rNxRQEH3YeCWs6/i7vzs+ORvG51QeCNYQgNqFzPsWRcq5qJYc+JPFbisS7q9nghqWTwr52cnjarD4Xx3ceGorMyS5NvFpCNxMgqHNyGkLvipxcTTJfKZK61bpnbntoIjiIO1XSZKjcxnXFGFnolV9BHCr5v8f7F42n2tUH7X3nDHmTBr1AbO2lFAU9ra/KezHcIf0wg2HcV8LZD0+mj8q/SBPjQZSH7cxwx4Q2eTjT4Sw7xnrBGuySVm8ZPCAV7nRNEHo+VqR+GQ==" - timeout_in_minutes: 30 - # Don't run Buildkite if the commit message includes the text [skip tests] - if: build.message !~ /\[skip tests\]/ \ No newline at end of file diff --git a/docs/Project.toml b/docs/Project.toml index 870a3232..edb712e6 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,5 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" HierarchicalEOM = "a62dbcb7-80f5-4d31-9a88-8b19fd92b128" LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" @@ -13,7 +12,6 @@ QuantumOptics = "6e0679c1-51ea-5a7c-ac74-d61b76210b0c" [compat] BenchmarkTools = "1.3" -CUDA = "5" Documenter = "0.27, 1" HierarchicalEOM = "1" LaTeXStrings = "1" From 9ee70211b83658f886ed998ea955c848d8765321 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Tue, 31 Oct 2023 21:56:41 +0900 Subject: [PATCH 13/14] add buildkite CI for CUDA extention tests --- .buildkite/pipeline.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .buildkite/pipeline.yml diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 00000000..4a68ca76 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,18 @@ +steps: + - label: "HierarchicalEOM_CUDAExt" + plugins: + - JuliaCI/julia#v1: + version: "1" + - JuliaCI/julia-test#v1: + test_args: "--quickfail" + coverage: false # 1000x slowdown + agents: + queue: "juliagpu" + cuda: "*" + env: + GROUP: "HierarchicalEOM_CUDAExt" + JULIA_PKG_SERVER: "" # it often struggles with our large artifacts + # SECRET_CODECOV_TOKEN: "..." + timeout_in_minutes: 30 + # Don't run Buildkite if the commit message includes the text [skip tests] + if: build.message !~ /\[skip tests\]/ \ No newline at end of file From 94826f75343a2dfe8f85df40b17d0321732f3f43 Mon Sep 17 00:00:00 2001 From: Yi-Te Huang Date: Wed, 1 Nov 2023 08:26:50 +0900 Subject: [PATCH 14/14] fix docs --- docs/src/extensions/CUDA.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/src/extensions/CUDA.md b/docs/src/extensions/CUDA.md index 49ec42e6..d9e2cb8e 100644 --- a/docs/src/extensions/CUDA.md +++ b/docs/src/extensions/CUDA.md @@ -6,6 +6,7 @@ This is an extension to support GPU ([`CUDA.jl`](https://github.com/JuliaGPU/CUD The described feature requires `Julia 1.9+`. The functions [`evolution`](@ref doc-Time-Evolution) (only supports ODE method with time-independent system Hamiltonian) and [`spectrum`](@ref doc-Spectrum) will automatically choose to solve on CPU or GPU depend on the type of the sparse matrix in `M::AbstractHEOMLSMatrix` objects (i.e., the type of the field `M.data`). + ```julia typeof(M.data) <: SparseMatrixCSC # solve on CPU typeof(M.data) <: CuSparseMatrixCSC # solve on GPU @@ -19,11 +20,11 @@ Therefore, we wrapped several functions in `CUDA` and `CUDA.CUSPARSE` in order t The extension will be automatically loaded if user imports the package `CUDA.jl` : -````@example CUDA_Ext_example +```julia using CUDA using HierarchicalEOM using LinearSolve # to change the solver for better GPU performance -```` +``` ### Setup