diff --git a/docs/src/api.md b/docs/src/api.md index 419149b40..73c97bc80 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -11,8 +11,36 @@ Krylov.AdjointStats ## Solver Types ```@docs -Krylov.KrylovSolver -Krylov.MinresSolver +KrylovSolver +MinresSolver +CgSolver +CrSolver +SymmlqSolver +CgLanczosSolver +CgLanczosShiftSolver +MinresQlpSolver +DqgmresSolver +DiomSolver +UsymlqSolver +UsymqrSolver +TricgSolver +TrimrSolver +TrilqrSolver +CgsSolver +BicgstabSolver +BilqSolver +QmrSolver +BilqrSolver +CglsSolver +CrlsSolver +CgneSolver +CrmrSolver +LslqSolver +LsqrSolver +LsmrSolver +LnlqSolver +CraigSolver +CraigmrSolver ``` ## Utilities @@ -22,6 +50,7 @@ Krylov.roots_quadratic Krylov.sym_givens Krylov.to_boundary Krylov.vec2str +Krylov.ktypeof Krylov.kzeros Krylov.kones ``` diff --git a/src/bicgstab.jl b/src/bicgstab.jl index 715a2c79b..896664c3f 100644 --- a/src/bicgstab.jl +++ b/src/bicgstab.jl @@ -13,7 +13,7 @@ # Alexis Montoison, # Montréal, October 2020. -export bicgstab +export bicgstab, bicgstab! """ (x, stats) = bicgstab(A, b::AbstractVector{T}; c::AbstractVector{T}=b, @@ -41,9 +41,14 @@ This implementation allows a left preconditioner `M` and a right preconditioner * H. A. van der Vorst, *Bi-CGSTAB: A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems*, SIAM Journal on Scientific and Statistical Computing, 13(2), pp. 631--644, 1992. * G. L.G. Sleijpen and D. R. Fokkema, *BiCGstab(ℓ) for linear equations involving unsymmetric matrices with complex spectrum*, Electronic Transactions on Numerical Analysis, 1, pp. 11--32, 1993. """ -function bicgstab(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, - M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function bicgstab(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = BicgstabSolver(A, b) + bicgstab!(solver, A, b; kwargs...) +end + +function bicgstab!(solver :: BicgstabSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, + M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} n, m = size(A) m == n || error("System must be square") @@ -56,18 +61,19 @@ function bicgstab(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") - # Determine the storage type of b - S = typeof(b) - # Set up workspace. - x = kzeros(S, n) # x₀ - s = kzeros(S, n) # s₀ - v = kzeros(S, n) # v₀ - r = copy(M * b) # r₀ - p = copy(r) # p₁ + x, s, v, r, p = solver.x, solver.s, solver.v, solver.r, solver.p + + x .= zero(T) # x₀ + s .= zero(T) # s₀ + v .= zero(T) # v₀ + r .= (M * b) # r₀ + p .= r # p₁ α = one(T) # α₀ ω = one(T) # ω₀ diff --git a/src/bilq.jl b/src/bilq.jl index 38ac9fc05..147a5757b 100644 --- a/src/bilq.jl +++ b/src/bilq.jl @@ -10,7 +10,7 @@ # Alexis Montoison, # Montreal, February 2019. -export bilq +export bilq, bilq! """ (x, stats) = bilq(A, b::AbstractVector{T}; c::AbstractVector{T}=b, @@ -29,9 +29,14 @@ when it exists. The transfer is based on the residual norm. * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. """ -function bilq(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function bilq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = BilqSolver(A, b) + bilq!(solver, A, b; kwargs...) +end + +function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, + atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} n, m = size(A) m == n || error("System must be square") @@ -40,15 +45,17 @@ function bilq(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.d̅ # Initial solution x₀ and residual norm ‖r₀‖. - x = kzeros(S, n) + x .= zero(T) bNorm = @knrm2(n, b) # ‖r₀‖ bNorm == 0 && return (x, SimpleStats(true, false, [bNorm], T[], "x = 0 is a zero-residual solution")) @@ -64,16 +71,15 @@ function bilq(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, bᵗc = @kdot(n, b, c) # ⟨b,c⟩ bᵗc == 0 && return (x, SimpleStats(false, false, [bNorm], T[], "Breakdown bᵀc = 0")) - # Set up workspace. βₖ = √(abs(bᵗc)) # β₁γ₁ = bᵀc γₖ = bᵗc / βₖ # β₁γ₁ = bᵀc - vₖ₋₁ = kzeros(S, n) # v₀ = 0 - uₖ₋₁ = kzeros(S, n) # u₀ = 0 - vₖ = b / βₖ # v₁ = b / β₁ - uₖ = c / γₖ # u₁ = c / γ₁ + vₖ₋₁ .= zero(T) # v₀ = 0 + uₖ₋₁ .= zero(T) # u₀ = 0 + vₖ .= b ./ βₖ # v₁ = b / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(T) # Givens sines used for the LQ factorization of Tₖ - d̅ = kzeros(S, n) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ + d̅ .= zero(T) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ ζₖ₋₁ = ζbarₖ = zero(T) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(T) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(T) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations diff --git a/src/bilqr.jl b/src/bilqr.jl index c44675ac2..8da8ee095 100644 --- a/src/bilqr.jl +++ b/src/bilqr.jl @@ -10,7 +10,7 @@ # Alexis Montoison, # Montreal, July 2019. -export bilqr +export bilqr, bilqr! """ (x, t, stats) = bilqr(A, b::AbstractVector{T}, c::AbstractVector{T}; @@ -32,9 +32,14 @@ BiCG point, when it exists. The transfer is based on the residual norm. * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. """ -function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = BilqrSolver(A, b) + bilqr!(solver, A, b, c; kwargs...) +end + +function bilqr!(solver :: BilqrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T}; + atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} n, m = size(A) m == n || error("Systems must be square") @@ -44,19 +49,21 @@ function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.t, solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂ # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖. - x = kzeros(S, n) # x₀ + x .= zero(T) # x₀ bNorm = @knrm2(n, b) # rNorm = ‖r₀‖ # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀt₀‖. - t = kzeros(S, n) # t₀ + t .= zero(T) # t₀ cNorm = @knrm2(n, c) # sNorm = ‖s₀‖ iter = 0 @@ -76,21 +83,21 @@ function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Set up workspace. βₖ = √(abs(bᵗc)) # β₁γ₁ = bᵀc γₖ = bᵗc / βₖ # β₁γ₁ = bᵀc - vₖ₋₁ = kzeros(S, n) # v₀ = 0 - uₖ₋₁ = kzeros(S, n) # u₀ = 0 - vₖ = b / βₖ # v₁ = b / β₁ - uₖ = c / γₖ # u₁ = c / γ₁ + vₖ₋₁ .= zero(T) # v₀ = 0 + uₖ₋₁ .= zero(T) # u₀ = 0 + vₖ .= b ./ βₖ # v₁ = b / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(T) # Givens sines used for the LQ factorization of Tₖ - d̅ = kzeros(S, n) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ + d̅ .= zero(T) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ ζₖ₋₁ = ζbarₖ = zero(T) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(T) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(T) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations ψbarₖ₋₁ = ψₖ₋₁ = zero(T) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁ norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates ϵₖ₋₃ = λₖ₋₂ = zero(T) # Components of Lₖ₋₁ - wₖ₋₃ = kzeros(S, n) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ - wₖ₋₂ = kzeros(S, n) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ + wₖ₋₃ .= zero(T) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ + wₖ₋₂ .= zero(T) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ τₖ = zero(T) # τₖ is used for the dual residual norm estimate # Stopping criterion. diff --git a/src/cg.jl b/src/cg.jl index e839f2e41..723ee168b 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -13,7 +13,7 @@ # Dominique Orban, # Salt Lake City, UT, March 2015. -export cg +export cg, cg! """ @@ -37,10 +37,15 @@ with `n = length(b)`. * M. R. Hestenes and E. Stiefel, *Methods of conjugate gradients for solving linear systems*, Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952. """ -function cg(A, b :: AbstractVector{T}; - M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false, - verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function cg(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CgSolver(A, b) + cg!(solver, A, b; kwargs...) +end + +function cg!(solver :: CgSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false, + verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0") @@ -50,16 +55,16 @@ function cg(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, r, p = solver.x, solver.r, solver.p - # Initial state. - x = kzeros(S, n) - r = copy(b) + x .= zero(T) + r .= b z = M * r - p = copy(z) + p .= z γ = @kdot(n, r, z) γ == 0 && return x, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution") diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl index 1f44a04b7..07e632c64 100644 --- a/src/cg_lanczos.jl +++ b/src/cg_lanczos.jl @@ -11,7 +11,7 @@ # Dominique Orban, # Princeton, NJ, March 2015. -export cg_lanczos, cg_lanczos_shift_seq +export cg_lanczos, cg_lanczos!, cg_lanczos_shift_seq, cg_lanczos_shift_seq! """ @@ -34,37 +34,43 @@ assumed to be symmetric and positive definite. * A. Frommer and P. Maass, *Fast CG-Based Methods for Tikhonov-Phillips Regularization*, SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999. * C. C. Paige and M. A. Saunders, *Solution of Sparse Indefinite Systems of Linear Equations*, SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975. """ -function cg_lanczos(A, b :: AbstractVector{T}; - M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, - check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function cg_lanczos(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CgLanczosSolver(A, b) + cg_lanczos!(solver, A, b; kwargs...) +end + +function cg_lanczos!(solver :: CgLanczosSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} n = size(b, 1) (size(A, 1) == n & size(A, 2) == n) || error("Inconsistent problem size") (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables\n", n, n) - # Determine the storage type of b - S = typeof(b) - # Tests M == Iₙ MisI = isa(M, opEye) # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") + # Set up workspace. + x, Mv, Mv_prev, p = solver.x, solver.Mv, solver.Mv_prev, solver.p + # Initial state. - x = kzeros(S, n) # x₀ - Mv = copy(b) # Mv₁ ← b + x .= zero(T) # x₀ + Mv .= b # Mv₁ ← b v = M * Mv # v₁ = M⁻¹ * Mv₁ β = sqrt(@kdot(n, v, Mv)) # β₁ = v₁ᵀ M v₁ β == 0 && return x, LanczosStats(true, [zero(T)], false, zero(T), zero(T), "x = 0 is a zero-residual solution") - p = copy(v) + p .= v # Initialize Lanczos process. # β₁Mv₁ = b @kscal!(n, one(T)/β, v) # v₁ ← v₁ / β₁ MisI || @kscal!(n, one(T)/β, Mv) # Mv₁ ← Mv₁ / β₁ - Mv_prev = copy(Mv) + Mv_prev .= Mv iter = 0 itmax == 0 && (itmax = 2 * n) @@ -150,9 +156,14 @@ The method does _not_ abort if A + αI is not definite. A preconditioner M may be provided in the form of a linear operator and is assumed to be symmetric and positive definite. """ -function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVector{T}; - M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, - check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CgLanczosShiftSolver(A, b, shifts) + cg_lanczos_shift_seq!(solver, A, b, shifts; kwargs...) +end + +function cg_lanczos_shift_seq!(solver :: CgLanczosShiftSolver{T,S}, A, b :: AbstractVector{T}, shifts :: AbstractVector{T}; + M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} n = size(b, 1) (size(A, 1) == n & size(A, 2) == n) || error("Inconsistent problem size") @@ -160,52 +171,62 @@ function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVecto nshifts = size(shifts, 1) (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts) - # Determine the storage type of b - S = typeof(b) - # Tests M == Iₙ MisI = isa(M, opEye) # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(shifts) == S || error("ktypeof(shifts) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") + # Set up workspace. + Mv, Mv_prev, x, p, σ, δhat, ω, γ = solver.Mv, solver.Mv_prev, solver.x, solver.p, solver.σ, solver.δhat, solver.ω, solver.γ + rNorms, indefinite, converged, not_cv = solver.rNorms, solver.indefinite, solver.converged, solver.not_cv + # Initial state. ## Distribute x similarly to shifts. - x = [kzeros(S, n) for i = 1 : nshifts] # x₀ - Mv = copy(b) # Mv₁ ← b + for i = 1 : nshifts + x[i] .= zero(T) # x₀ + end + Mv .= b # Mv₁ ← b v = M * Mv # v₁ = M⁻¹ * Mv₁ β = sqrt(@kdot(n, v, Mv)) # β₁ = v₁ᵀ M v₁ β == 0 && return x, LanczosStats(true, [zero(T)], false, zero(T), zero(T), "x = 0 is a zero-residual solution") # Initialize each p to v. - p = [copy(v) for i = 1 : nshifts] + for i = 1 : nshifts + p[i] .= v + end # Initialize Lanczos process. # β₁Mv₁ = b @kscal!(n, one(T)/β, v) # v₁ ← v₁ / β₁ MisI || @kscal!(n, one(T)/β, Mv) # Mv₁ ← Mv₁ / β₁ - Mv_prev = copy(Mv) + Mv_prev .= Mv # Initialize some constants used in recursions below. ρ = one(T) - σ = β * ones(T, nshifts) - δhat = zeros(T, nshifts) - ω = zeros(T, nshifts) - γ = ones(T, nshifts) + σ .= β + δhat .= zero(T) + ω .= zero(T) + γ .= one(T) # Define stopping tolerance. - rNorms = β * ones(T, nshifts) + rNorms .= β rNorms_history = history ? [rNorms;] : T[] ε = atol + rtol * β # Keep track of shifted systems that have converged. - converged = rNorms .≤ ε + for i = 1 : nshifts + converged[i] = rNorms[i] ≤ ε + not_cv[i] = !converged[i] + end iter = 0 itmax == 0 && (itmax = 2 * n) # Keep track of shifted systems with negative curvature if required. - indefinite = falses(nshifts) + indefinite .= false # Build format strings for printing. if display(iter, verbose) @@ -215,7 +236,7 @@ function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVecto local_printf(iter, rNorms...) end - solved = all(converged) + solved = sum(not_cv) == 0 tired = iter ≥ itmax status = "unknown" @@ -241,34 +262,39 @@ function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVecto MisI || (ρ = @kdot(n, v, v)) for i = 1 : nshifts δhat[i] = δ + ρ * shifts[i] - γ[i] = 1 ./ (δhat[i] - ω[i] ./ γ[i]) + γ[i] = 1 / (δhat[i] - ω[i] / γ[i]) + end + for i = 1 : nshifts + indefinite[i] |= γ[i] ≤ 0 end - indefinite .|= (γ .≤ 0) # Compute next CG iterate for each shifted system that has not yet converged. # Stop iterating on indefinite problems if requested. - not_cv = check_curvature ? findall(.! (converged .| indefinite)) : findall(.! converged) - - for i in not_cv - @kaxpy!(n, γ[i], p[i], x[i]) - ω[i] = β * γ[i] - σ[i] *= -ω[i] - ω[i] *= ω[i] - @kaxpby!(n, σ[i], v, ω[i], p[i]) - - # Update list of systems that have converged. - rNorms[i] = abs(σ[i]) - converged[i] = rNorms[i] ≤ ε + for i = 1 : nshifts + not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] + if not_cv[i] + @kaxpy!(n, γ[i], p[i], x[i]) + ω[i] = β * γ[i] + σ[i] *= -ω[i] + ω[i] *= ω[i] + @kaxpby!(n, σ[i], v, ω[i], p[i]) + + # Update list of systems that have not converged. + rNorms[i] = abs(σ[i]) + converged[i] = rNorms[i] ≤ ε + end end length(not_cv) > 0 && history && append!(rNorms_history, rNorms) # Is there a better way than to update this array twice per iteration? - not_cv = check_curvature ? findall(.! (converged .| indefinite)) : findall(.! converged) + for i = 1 : nshifts + not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] + end iter = iter + 1 display(iter, verbose) && local_printf(iter, rNorms...) - solved = length(not_cv) == 0 + solved = sum(not_cv) == 0 tired = iter ≥ itmax end (verbose > 0) && @printf("\n") diff --git a/src/cgls.jl b/src/cgls.jl index d79339158..ea873570c 100644 --- a/src/cgls.jl +++ b/src/cgls.jl @@ -26,7 +26,7 @@ # Dominique Orban, # Princeton, NJ, March 2015. -export cgls +export cgls, cgls! """ @@ -54,9 +54,14 @@ but simpler to implement. * M. R. Hestenes and E. Stiefel. *Methods of conjugate gradients for solving linear systems*, Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952. * A. Björck, T. Elfving and Z. Strakos, *Stability of Conjugate Gradient and Lanczos Methods for Linear Least Squares Problems*, SIAM Journal on Matrix Analysis and Applications, 19(3), pp. 720--736, 1998. """ -function cgls(A, b :: AbstractVector{T}; - M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function cgls(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CglsSolver(A, b) + cgls!(solver, A, b; kwargs...) +end + +function cgls!(solver :: CglsSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), + radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -64,21 +69,22 @@ function cgls(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || eltype(M) == T || error("eltype(M) ≠ $T") - # Compute Aᵀ + # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, p, r = solver.x, solver.p, solver.r - x = kzeros(S, n) - r = copy(b) + x .= zero(T) + r .= b bNorm = @knrm2(m, r) # Marginally faster than norm(b) bNorm == 0 && return x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution") Mr = M * r s = Aᵀ * Mr - p = copy(s) + p .= s γ = @kdot(n, s, s) # Faster than γ = dot(s, s) iter = 0 itmax == 0 && (itmax = m + n) diff --git a/src/cgne.jl b/src/cgne.jl index 890850155..78651835e 100644 --- a/src/cgne.jl +++ b/src/cgne.jl @@ -26,7 +26,7 @@ # Dominique Orban, # Montréal, QC, April 2015. -export cgne +export cgne, cgne! """ @@ -63,9 +63,14 @@ A preconditioner M may be provided in the form of a linear operator. * J. E. Craig, *The N-step iteration procedures*, Journal of Mathematics and Physics, 34(1), pp. 64--73, 1955. * J. E. Craig, *Iterations Procedures for Simultaneous Equations*, Ph.D. Thesis, Department of Electrical Engineering, MIT, 1954. """ -function cgne(A, b :: AbstractVector{T}; - M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function cgne(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CgneSolver(A, b) + cgne!(solver, A, b; kwargs...) +end + +function cgne!(solver :: CgneSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -73,16 +78,17 @@ function cgne(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, p, r = solver.x, solver.p, solver.r - x = kzeros(S, n) - r = copy(b) + x .= zero(T) + r .= b z = M * r rNorm = @knrm2(m, r) # Marginally faster than norm(r) rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution") @@ -91,7 +97,7 @@ function cgne(A, b :: AbstractVector{T}; # The following vector copy takes care of the case where A is a LinearOperator # with preallocation, so as to avoid overwriting vectors used later. In other # case, this should only add minimum overhead. - p = copy(Aᵀ * z) + p .= Aᵀ * z # Use ‖p‖ to detect inconsistent system. # An inconsistent system will necessarily have AA' singular. diff --git a/src/cgs.jl b/src/cgs.jl index 1ef4ddc0c..e67d96f73 100644 --- a/src/cgs.jl +++ b/src/cgs.jl @@ -8,7 +8,7 @@ # Alexis Montoison, # Montreal, October 2018. -export cgs +export cgs, cgs! """ (x, stats) = cgs(A, b::AbstractVector{T}; c::AbstractVector{T}=b, @@ -38,9 +38,14 @@ This implementation allows a left preconditioner M and a right preconditioner N. * P. Sonneveld, *CGS, A Fast Lanczos-Type Solver for Nonsymmetric Linear systems*, SIAM Journal on Scientific and Statistical Computing, 10(1), pp. 36--52, 1989. """ -function cgs(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, - M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function cgs(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CgsSolver(A, b) + cgs!(solver, A, b; kwargs...) +end + +function cgs!(solver :: CgsSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, + M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) m == n || error("System must be square") @@ -49,15 +54,16 @@ function cgs(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") isa(N, opEye) || (eltype(N) == T) || error("eltype(N) ≠ $T") - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, r, u, p, q = solver.x, solver.r, solver.u, solver.p, solver.q - # Initial solution x₀ and residual r₀. - x = kzeros(S, n) # x₀ - r = copy(M * b) # r₀ + x .= zero(T) # x₀ + r .= M * b # r₀ # Compute residual norm ‖r₀‖₂. rNorm = @knrm2(n, r) @@ -75,10 +81,9 @@ function cgs(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") display(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) - # Set up workspace. - u = copy(r) # u₀ - p = copy(r) # p₀ - q = kzeros(S, n) # q₋₁ + u .= r # u₀ + p .= r # p₀ + q .= zero(T) # q₋₁ # Stopping criterion. solved = rNorm ≤ ε diff --git a/src/cr.jl b/src/cr.jl index 4a18756ca..6d45353a4 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -9,7 +9,7 @@ # Marie-Ange Dahito, # Montreal, QC, June 2017 -export cr +export cr, cr! """ (x, stats) = cr(A, b::AbstractVector{T}; @@ -32,9 +32,14 @@ with `n = length(b)`. * M. R. Hestenes and E. Stiefel, *Methods of conjugate gradients for solving linear systems*, Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952. * M-A. Dahito and D. Orban, *The Conjugate Residual Method in Linesearch and Trust-Region Methods*, SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019. """ -function cr(A, b :: AbstractVector{T}; - M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0, - radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false) where T <: AbstractFloat +function cr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CrSolver(A, b) + cr!(solver, A, b; kwargs...) +end + +function cr!(solver :: CrSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0, + radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false) where {S, T <: AbstractFloat} linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0") n = size(b, 1) # size of the problem @@ -43,20 +48,21 @@ function cr(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") - # Determine the storage type of b - S = typeof(b) + # Set up workspace  + x, r, p, q = solver.x, solver.r, solver.p, solver.q # Initial state. - x = kzeros(S, n) # initial estimation x = 0 + x .= zero(T) # initial estimation x = 0 xNorm = zero(T) - r = copy(M * b) # initial residual r = M * (b - Ax) = M * b + r .= M * b # initial residual r = M * (b - Ax) = M * b Ar = A * r ρ = @kdot(n, r, Ar) ρ == 0 && return (x, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution")) - p = copy(r) - q = copy(Ar) + p .= r + q .= Ar (verbose > 0) && (m = zero(T)) # quadratic model iter = 0 diff --git a/src/craig.jl b/src/craig.jl index 9893ffffc..0d1f2160b 100644 --- a/src/craig.jl +++ b/src/craig.jl @@ -30,7 +30,7 @@ # # This implementation is strongly inspired from Mike Saunders's. -export craig +export craig, craig! """ @@ -76,10 +76,15 @@ In this implementation, both the x and y-parts of the solution are returned. * C. C. Paige and M. A. Saunders, *LSQR: An Algorithm for Sparse Linear Equations and Sparse Least Squares*, ACM Transactions on Mathematical Software, 8(1), pp. 43--71, 1982. * M. A. Saunders, *Solutions of Sparse Rectangular Systems Using LSQR and CRAIG*, BIT Numerical Mathematics, 35(4), pp. 588--604, 1995. """ -function craig(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), - btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0, - verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false) where T <: AbstractFloat +function craig(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CraigSolver(A, b) + craig!(solver, A, b; kwargs...) +end + +function craig!(solver :: CraigSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), + btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0, + verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -91,22 +96,23 @@ function craig(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, Nv, y, w, Mu = solver.x, solver.Nv, solver.y, solver.w, solver.Mu - x = kzeros(S, n) - y = kzeros(S, m) + x .= zero(T) + y .= zero(T) # When solving a SQD system, set regularization parameter λ = 1. sqd && (λ = one(T)) - Mu = copy(b) + Mu .= b u = M * Mu β₁ = sqrt(@kdot(m, u, Mu)) β₁ == 0 && return x, y, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution") @@ -122,8 +128,8 @@ function craig(A, b :: AbstractVector{T}; @kscal!(m, one(T)/β₁, u) MisI || @kscal!(m, one(T)/β₁, Mu) - Nv = kzeros(S, n) - w = kzeros(S, m) # Used to update y. + Nv .= zero(T) + w .= zero(T) # Used to update y. λ > 0 && (w2 = kzeros(S, n)) diff --git a/src/craigmr.jl b/src/craigmr.jl index d469ce76b..7b266bb82 100644 --- a/src/craigmr.jl +++ b/src/craigmr.jl @@ -24,7 +24,7 @@ # Dominique Orban, # Montreal, QC, May 2015. -export craigmr +export craigmr, craigmr! """ @@ -69,9 +69,14 @@ returned. * D. Orban and M. Arioli. *Iterative Solution of Symmetric Quasi-Definite Linear Systems*, Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017. * D. Orban, *The Projected Golub-Kahan Process for Constrained, Linear Least-Squares Problems*. Cahier du GERAD G-2014-15, 2014. """ -function craigmr(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), λ :: T=zero(T), atol :: T=√eps(T), - rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function craigmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CraigmrSolver(A, b) + craigmr!(solver, A, b; kwargs...) +end + +function craigmr!(solver :: CraigmrSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), λ :: T=zero(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -83,19 +88,20 @@ function craigmr(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, Nv, y, Mu, w, wbar = solver.x, solver.Nv, solver.y, solver.Mu, solver.w, solver.wbar # Compute y such that AAᵀy = b. Then recover x = Aᵀy. - x = kzeros(S, n) - y = kzeros(S, m) - Mu = copy(b) + x .= zero(T) + y .= zero(T) + Mu .= b u = M * Mu β = sqrt(@kdot(m, u, Mu)) β == 0 && return (x, y, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution")) @@ -106,7 +112,7 @@ function craigmr(A, b :: AbstractVector{T}; MisI || @kscal!(m, one(T)/β, Mu) # α₁Nv₁ = Aᵀu₁. Aᵀu = Aᵀ * u - Nv = copy(Aᵀu) + Nv .= Aᵀu v = N * Nv α = sqrt(@kdot(n, v, Nv)) Anorm² = α * α @@ -134,9 +140,9 @@ function craigmr(A, b :: AbstractVector{T}; ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. - wbar = copy(u) + wbar .= u @kscal!(m, one(T)/α, wbar) - w = kzeros(S, m) + w .= zero(T) status = "unknown" solved = rNorm ≤ ɛ_c diff --git a/src/crls.jl b/src/crls.jl index b2276b6df..1eff95312 100644 --- a/src/crls.jl +++ b/src/crls.jl @@ -18,7 +18,7 @@ # Dominique Orban, # Princeton, NJ, March 2015. -export crls +export crls, crls! """ @@ -45,9 +45,14 @@ but simpler to implement. * D. C.-L. Fong, *Minimum-Residual Methods for Sparse, Least-Squares using Golubg-Kahan Bidiagonalization*, Ph.D. Thesis, Stanford University, 2011. """ -function crls(A, b :: AbstractVector{T}; - M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function crls(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CrlsSolver(A, b) + crls!(solver, A, b; kwargs...) +end + +function crls!(solver :: CrlsSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), + radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -55,26 +60,27 @@ function crls(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, p, Ar, r, Ap = solver.x, solver.p, solver.Ar, solver.r, solver.Ap - x = kzeros(S, n) - r = copy(b) + x .= zero(T) + r .= b bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. bNorm == 0 && return x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution") Mr = M * r - Ar = copy(Aᵀ * Mr) # - λ * x0 if x0 ≠ 0. + Ar .= Aᵀ * Mr # - λ * x0 if x0 ≠ 0. s = A * Ar Ms = M * s - p = copy(Ar) - Ap = copy(s) + p .= Ar + Ap .= s q = Aᵀ * Ms # Ap λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p γ = @kdot(m, s, Ms) # Faster than γ = dot(s, Ms) diff --git a/src/crmr.jl b/src/crmr.jl index 441bed28d..fc469de05 100644 --- a/src/crmr.jl +++ b/src/crmr.jl @@ -24,7 +24,7 @@ # Dominique Orban, # Montreal, QC, April 2015. -export crmr +export crmr, crmr! """ @@ -61,9 +61,14 @@ A preconditioner M may be provided in the form of a linear operator. * D. Orban and M. Arioli, *Iterative Solution of Symmetric Quasi-Definite Linear Systems*, Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017. * D. Orban, *The Projected Golub-Kahan Process for Constrained Linear Least-Squares Problems*. Cahier du GERAD G-2014-15, 2014. """ -function crmr(A, b :: AbstractVector{T}; - M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), - rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function crmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = CrmrSolver(A, b) + crmr!(solver, A, b; kwargs...) +end + +function crmr!(solver :: CrmrSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -71,23 +76,24 @@ function crmr(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, p, r = solver.x, solver.p, solver.r - x = kzeros(S, n) # initial estimation x = 0 - r = copy(M * b) # initial residual r = M * (b - Ax) = M * b + x .= zero(T) # initial estimation x = 0 + r .= M * b # initial residual r = M * (b - Ax) = M * b bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. bNorm == 0 && return x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution") rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. λ > 0 && (s = copy(r)) Aᵀr = Aᵀ * r # - λ * x0 if x0 ≠ 0. - p = copy(Aᵀr) - γ = @kdot(n, Aᵀr, Aᵀr) # Faster than γ = dot(Aᵀr, Aᵀr) + p .= Aᵀr + γ = @kdot(n, Aᵀr, Aᵀr) # Faster than γ = dot(Aᵀr, Aᵀr) λ > 0 && (γ += λ * rNorm * rNorm) iter = 0 itmax == 0 && (itmax = m + n) diff --git a/src/diom.jl b/src/diom.jl index 272c59f0f..ee3167706 100644 --- a/src/diom.jl +++ b/src/diom.jl @@ -8,7 +8,7 @@ # Alexis Montoison, # Montreal, September 2018. -export diom +export diom, diom! """ (x, stats) = diom(A, b::AbstractVector{T}; @@ -31,9 +31,14 @@ This implementation allows a left preconditioner M and a right preconditioner N. * Y. Saad, *Practical use of some krylov subspace methods for solving indefinite and nonsymmetric linear systems*, SIAM journal on scientific and statistical computing, 5(1), pp. 203--228, 1984. """ -function diom(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, - memory :: Int=20, pivoting :: Bool=false, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function diom(A, b :: AbstractVector{T}; memory :: Int=20, kwargs...) where T <: AbstractFloat + solver = DiomSolver(A, b, memory=memory) + diom!(solver, A, b; kwargs...) +end + +function diom!(solver :: DiomSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + pivoting :: Bool=false, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) m == n || error("System must be square") @@ -42,16 +47,17 @@ function diom(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") isa(N, opEye) || (eltype(N) == T) || error("eltype(N) ≠ $T") - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, x_old, P, V, L, H, p = solver.x, solver.x_old, solver.P, solver.V, solver.L, solver.H, solver.p # Initial solution x₀ and residual r₀. - x = kzeros(S, n) # x₀ - x_old = copy(x) - r₀ = M * b # M⁻¹(b - Ax₀) + x .= zero(T) # x₀ + x_old .= x + r₀ = M * b # M⁻¹(b - Ax₀) # Compute β. rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution") @@ -64,16 +70,17 @@ function diom(A, b :: AbstractVector{T}; (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") display(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) - # Set up workspace. - mem = min(memory, itmax) # Memory. - V = [kzeros(S, n) for i = 1 : mem] # Preconditioned Krylov vectors, orthogonal basis for {b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}. - P = [kzeros(S, n) for i = 1 : mem] # Directions for x : Pₘ = Vₘ(Uₘ)⁻¹. - H = zeros(T, mem+2) # Last column of the band hessenberg matrix Hₘ = LₘUₘ. + mem = length(L) # Memory + for i = 1 : mem + V[i] .= zero(T) # Preconditioned Krylov vectors, orthogonal basis for {M⁻¹b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}. + P[i] .= zero(T) # Directions for x : Pₘ = Vₘ(Uₘ)⁻¹. + end + H .= zero(T) # Last column of the band hessenberg matrix Hₘ = LₘUₘ. # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2]. # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located. # In addition of that, the last column of Uₘ is stored in H. - L = zeros(T, mem) # Last mem Pivots of Lₘ. - p = BitArray(undef, mem) # Last mem permutations. + L .= zero(T) # Last mem Pivots of Lₘ. + p .= false # Last mem permutations. # Initial ξ₁ and V₁. ξ = rNorm diff --git a/src/dqgmres.jl b/src/dqgmres.jl index 635f5d43e..c32de22ce 100644 --- a/src/dqgmres.jl +++ b/src/dqgmres.jl @@ -8,7 +8,7 @@ # Alexis Montoison, # Montreal, August 2018. -export dqgmres +export dqgmres, dqgmres! """ (x, stats) = dqgmres(A, b::AbstractVector{T}; @@ -29,9 +29,14 @@ This implementation allows a left preconditioner M and a right preconditioner N. * Y. Saad and K. Wu, *DQGMRES: a quasi minimal residual algorithm based on incomplete orthogonalization*, Numerical Linear Algebra with Applications, Vol. 3(4), pp. 329--343, 1996. """ -function dqgmres(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, memory :: Int=20, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function dqgmres(A, b :: AbstractVector{T}; memory :: Int=20, kwargs...) where T <: AbstractFloat + solver = DqgmresSolver(A, b, memory=memory) + dqgmres!(solver, A, b; kwargs...) +end + +function dqgmres!(solver :: DqgmresSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) m == n || error("System must be square") @@ -40,15 +45,16 @@ function dqgmres(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") isa(N, opEye) || (eltype(N) == T) || error("eltype(N) ≠ $T") - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, P, V, c, s, H = solver.x, solver.P, solver.V, solver.c, solver.s, solver.H # Initial solution x₀ and residual r₀. - x = kzeros(S, n) # x₀ - r₀ = M * b # M⁻¹(b - Ax₀) + x .= zero(T) # x₀ + r₀ = M * b # M⁻¹(b - Ax₀) # Compute β rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution") @@ -62,12 +68,14 @@ function dqgmres(A, b :: AbstractVector{T}; display(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) # Set up workspace. - mem = min(memory, itmax) # Memory. - V = [kzeros(S, n) for i = 1 : mem] # Preconditioned Krylov vectors, orthogonal basis for {b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}. - P = [kzeros(S, n) for i = 1 : mem] # Directions for x : Pₘ = Vₘ(Rₘ)⁻¹. - s = zeros(T, mem) # Last mem Givens sines used for the factorization QₘRₘ = Hₘ. - c = zeros(T, mem) # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ. - H = zeros(T, mem+2) # Last column of the band hessenberg matrix Hₘ. + mem = length(c) # Memory. + for i = 1 : mem + V[i] .= zero(T) # Preconditioned Krylov vectors, orthogonal basis for {M⁻¹b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}. + P[i] .= zero(T) # Directions for x : Pₘ = Vₘ(Rₘ)⁻¹. + end + s .= zero(T) # Last mem Givens sines used for the factorization QₘRₘ = Hₘ. + c .= zero(T) # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ. + H .= zero(T) # Last column of the band hessenberg matrix Hₘ. # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2]. # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located. # In addition of that, the last column of Rₘ is also stored in H. diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl index 54a8b7894..a7af49cfb 100644 --- a/src/krylov_solvers.jl +++ b/src/krylov_solvers.jl @@ -1,18 +1,22 @@ -export KrylovSolver, MinresSolver +export KrylovSolver, MinresSolver, CgSolver, CrSolver, SymmlqSolver, CgLanczosSolver, +CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver, +UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver, +BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver, +LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver "Abstract type for using Krylov solvers in-place" -abstract type KrylovSolver{T, S} end +abstract type KrylovSolver{T,S} end """ Type for storing the vectors required by the in-place version of MINRES. -The outer constructor +The outer constructor - solver = MinresSolver(A, b :: AbstractVector{T}; window :: Int=5) where T <: AbstractFloat + solver = MinresSolver(A, b; window :: Int=5) may be used in order to create these vectors. """ -mutable struct MinresSolver{T, S} <: KrylovSolver{T, S} +mutable struct MinresSolver{T,S} <: KrylovSolver{T,S} x :: S r1 :: S r2 :: S @@ -20,16 +24,912 @@ mutable struct MinresSolver{T, S} <: KrylovSolver{T, S} w2 :: S err_vec :: Vector{T} stats :: SimpleStats{T} + + function MinresSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + r1 = S(undef, n) + r2 = S(undef, n) + w1 = S(undef, n) + w2 = S(undef, n) + err_vec = zeros(T, window) + stats = SimpleStats(false, true, T[], T[], "unknown") + solver = new{T,S}(x, r1, r2, w1, w2, err_vec, stats) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CG. + +The outer constructor + + solver = CgSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CgSolver{T,S} <: KrylovSolver{T,S} + x :: S + r :: S + p :: S + + function CgSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + solver = new{T,S}(x, r, p) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CR. + +The outer constructor + + solver = CrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CrSolver{T,S} <: KrylovSolver{T,S} + x :: S + r :: S + p :: S + q :: S + + function CrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + q = S(undef, n) + solver = new{T,S}(x, r, p, q) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of SYMMLQ. + +The outer constructor + + solver = SymmlqSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct SymmlqSolver{T,S} <: KrylovSolver{T,S} + x :: S + Mvold :: S + Mv :: S + w̅ :: S + + function SymmlqSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + Mvold = S(undef, n) + Mv = S(undef, n) + w̅ = S(undef, n) + solver = new{T,S}(x, Mvold, Mv, w̅) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CG-LANCZOS. + +The outer constructor + + solver = CgLanczosSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CgLanczosSolver{T,S} <: KrylovSolver{T,S} + x :: S + Mv :: S + Mv_prev :: S + p :: S + + function CgLanczosSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + Mv = S(undef, n) + Mv_prev = S(undef, n) + p = S(undef, n) + solver = new{T,S}(x, Mv, Mv_prev, p) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CG-LANCZOS-SHIFT-SEQ. + +The outer constructor + + solver = CgLanczosShiftSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CgLanczosShiftSolver{T,S} <: KrylovSolver{T,S} + Mv :: S + Mv_prev :: S + x :: Vector{S} + p :: Vector{S} + σ :: Vector{T} + δhat :: Vector{T} + ω :: Vector{T} + γ :: Vector{T} + rNorms :: Vector{T} + indefinite :: BitArray + converged :: BitArray + not_cv :: BitArray + + function CgLanczosShiftSolver(A, b, shifts) + n, m = size(A) + nshifts = length(shifts) + S = ktypeof(b) + T = eltype(b) + Mv = S(undef, n) + Mv_prev = S(undef, n) + x = [S(undef, n) for i = 1 : nshifts] + p = [S(undef, n) for i = 1 : nshifts] + σ = Vector{T}(undef, nshifts) + δhat = Vector{T}(undef, nshifts) + ω = Vector{T}(undef, nshifts) + γ = Vector{T}(undef, nshifts) + rNorms = Vector{T}(undef, nshifts) + indefinite = BitArray(undef, nshifts) + converged = BitArray(undef, nshifts) + not_cv = BitArray(undef, nshifts) + solver = new{T,S}(Mv, Mv_prev, x, p, σ, δhat, ω, γ, rNorms, indefinite, converged, not_cv) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of MINRES-QLP. + +The outer constructor + + solver = MinresQlpSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct MinresQlpSolver{T,S} <: KrylovSolver{T,S} + wₖ₋₁ :: S + wₖ :: S + M⁻¹vₖ₋₁ :: S + M⁻¹vₖ :: S + x :: S + + function MinresQlpSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + wₖ₋₁ = S(undef, n) + wₖ = S(undef, n) + M⁻¹vₖ₋₁ = S(undef, n) + M⁻¹vₖ = S(undef, n) + x = S(undef, n) + solver = new{T,S}(wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x) + return solver + end end -function MinresSolver(A, b :: AbstractVector{T}; window :: Int=5) where T <: AbstractFloat - m, n = size(A) - x = similar(b, n) - r1 = similar(b, n) - r2 = similar(b, n) - w1 = similar(b, n) - w2 = similar(b, n) - err_vec = zeros(T, window) - stats = SimpleStats(false, true, T[], T[], "unknown") - return MinresSolver(x, r1, r2, w1, w2, err_vec, stats) +""" +Type for storing the vectors required by the in-place version of DQGMRES. + +The outer constructor + + solver = DqgmresSolver(A, b; memory :: Integer=20) + +may be used in order to create these vectors. +""" +mutable struct DqgmresSolver{T,S} <: KrylovSolver{T,S} + x :: S + P :: Vector{S} + V :: Vector{S} + c :: Vector{T} + s :: Vector{T} + H :: Vector{T} + + function DqgmresSolver(A, b; memory :: Integer=20) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + P = [S(undef, n) for i = 1 : memory] + V = [S(undef, n) for i = 1 : memory] + c = Vector{T}(undef, memory) + s = Vector{T}(undef, memory) + H = Vector{T}(undef, memory+2) + solver = new{T,S}(x, P, V, c, s, H) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of DIOM. + +The outer constructor + + solver = DiomSolver(A, b; memory :: Integer=20) + +may be used in order to create these vectors. +""" +mutable struct DiomSolver{T,S} <: KrylovSolver{T,S} + x :: S + x_old :: S + P :: Vector{S} + V :: Vector{S} + L :: Vector{T} + H :: Vector{T} + p :: BitArray + + function DiomSolver(A, b; memory :: Integer=20) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + x_old = S(undef, n) + P = [S(undef, n) for i = 1 : memory] + V = [S(undef, n) for i = 1 : memory] + L = Vector{T}(undef, memory) + H = Vector{T}(undef, memory+2) + p = BitArray(undef, memory) + solver = new{T,S}(x, x_old, P, V, L, H, p) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of USYMLQ. + +The outer constructor + + solver = UsymlqSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct UsymlqSolver{T,S} <: KrylovSolver{T,S} + uₖ₋₁ :: S + uₖ :: S + x :: S + d̅ :: S + vₖ₋₁ :: S + vₖ :: S + + function UsymlqSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + uₖ₋₁ = S(undef, m) + uₖ = S(undef, m) + x = S(undef, m) + d̅ = S(undef, m) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + solver = new{T,S}(uₖ₋₁, uₖ, x, d̅, vₖ₋₁, vₖ) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of USYMQR. + +The outer constructor + + solver = UsymqrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct UsymqrSolver{T,S} <: KrylovSolver{T,S} + vₖ₋₁ :: S + vₖ :: S + x :: S + wₖ₋₂ :: S + wₖ₋₁ :: S + uₖ₋₁ :: S + uₖ :: S + + function UsymqrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + x = S(undef, m) + wₖ₋₂ = S(undef, m) + wₖ₋₁ = S(undef, m) + uₖ₋₁ = S(undef, m) + uₖ = S(undef, m) + solver = new{T,S}(vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of TRICG. + +The outer constructor + + solver = TricgSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct TricgSolver{T,S} <: KrylovSolver{T,S} + yₖ :: S + N⁻¹uₖ₋₁ :: S + N⁻¹uₖ :: S + gy₂ₖ₋₁ :: S + gy₂ₖ :: S + xₖ :: S + M⁻¹vₖ₋₁ :: S + M⁻¹vₖ :: S + gx₂ₖ₋₁ :: S + gx₂ₖ :: S + + function TricgSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + yₖ = S(undef, m) + N⁻¹uₖ₋₁ = S(undef, m) + N⁻¹uₖ = S(undef, m) + gy₂ₖ₋₁ = S(undef, m) + gy₂ₖ = S(undef, m) + xₖ = S(undef, n) + M⁻¹vₖ₋₁ = S(undef, n) + M⁻¹vₖ = S(undef, n) + gx₂ₖ₋₁ = S(undef, n) + gx₂ₖ = S(undef, n) + solver = new{T,S}(yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, gy₂ₖ₋₁, gy₂ₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, gx₂ₖ₋₁, gx₂ₖ) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of TRIMR. + +The outer constructor + + solver = TrimrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct TrimrSolver{T,S} <: KrylovSolver{T,S} + yₖ :: S + N⁻¹uₖ₋₁ :: S + N⁻¹uₖ :: S + gy₂ₖ₋₃ :: S + gy₂ₖ₋₂ :: S + gy₂ₖ₋₁ :: S + gy₂ₖ :: S + xₖ :: S + M⁻¹vₖ₋₁ :: S + M⁻¹vₖ :: S + gx₂ₖ₋₃ :: S + gx₂ₖ₋₂ :: S + gx₂ₖ₋₁ :: S + gx₂ₖ :: S + + function TrimrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + yₖ = S(undef, m) + N⁻¹uₖ₋₁ = S(undef, m) + N⁻¹uₖ = S(undef, m) + gy₂ₖ₋₃ = S(undef, m) + gy₂ₖ₋₂ = S(undef, m) + gy₂ₖ₋₁ = S(undef, m) + gy₂ₖ = S(undef, m) + xₖ = S(undef, n) + M⁻¹vₖ₋₁ = S(undef, n) + M⁻¹vₖ = S(undef, n) + gx₂ₖ₋₃ = S(undef, n) + gx₂ₖ₋₂ = S(undef, n) + gx₂ₖ₋₁ = S(undef, n) + gx₂ₖ = S(undef, n) + solver = new{T,S}(yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of TRILQR. + +The outer constructor + + solver = TrilqrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct TrilqrSolver{T,S} <: KrylovSolver{T,S} + uₖ₋₁ :: S + uₖ :: S + vₖ₋₁ :: S + vₖ :: S + x :: S + t :: S + d̅ :: S + wₖ₋₃ :: S + wₖ₋₂ :: S + + function TrilqrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + uₖ₋₁ = S(undef, m) + uₖ = S(undef, m) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + x = S(undef, m) + t = S(undef, n) + d̅ = S(undef, m) + wₖ₋₃ = S(undef, n) + wₖ₋₂ = S(undef, n) + solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CGS. + +The outer constructor + + solver = CgsSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CgsSolver{T,S} <: KrylovSolver{T,S} + x :: S + r :: S + u :: S + p :: S + q :: S + + function CgsSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + r = S(undef, n) + u = S(undef, n) + p = S(undef, n) + q = S(undef, n) + solver = new{T,S}(x, r, u, p, q) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of BICGSTAB. + +The outer constructor + + solver = BicgstabSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct BicgstabSolver{T,S} <: KrylovSolver{T,S} + x :: S + r :: S + p :: S + v :: S + s :: S + + function BicgstabSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + v = S(undef, n) + s = S(undef, n) + solver = new{T,S}(x, r, p, v, s) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of BILQ. + +The outer constructor + + solver = BilqSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct BilqSolver{T,S} <: KrylovSolver{T,S} + uₖ₋₁ :: S + uₖ :: S + vₖ₋₁ :: S + vₖ :: S + x :: S + d̅ :: S + + function BilqSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + x = S(undef, n) + d̅ = S(undef, n) + solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of QMR. + +The outer constructor + + solver = QmrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct QmrSolver{T,S} <: KrylovSolver{T,S} + uₖ₋₁ :: S + uₖ :: S + vₖ₋₁ :: S + vₖ :: S + x :: S + wₖ₋₂ :: S + wₖ₋₁ :: S + + function QmrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + x = S(undef, n) + wₖ₋₂ = S(undef, n) + wₖ₋₁ = S(undef, n) + solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of BILQR. + +The outer constructor + + solver = BilqrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct BilqrSolver{T,S} <: KrylovSolver{T,S} + uₖ₋₁ :: S + uₖ :: S + vₖ₋₁ :: S + vₖ :: S + x :: S + t :: S + d̅ :: S + wₖ₋₃ :: S + wₖ₋₂ :: S + + function BilqrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + x = S(undef, n) + t = S(undef, n) + d̅ = S(undef, n) + wₖ₋₃ = S(undef, n) + wₖ₋₂ = S(undef, n) + solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CGLS. + +The outer constructor + + solver = CglsSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CglsSolver{T,S} <: KrylovSolver{T,S} + x :: S + p :: S + r :: S + + function CglsSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + p = S(undef, m) + r = S(undef, n) + solver = new{T,S}(x, p, r) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CRLS. + +The outer constructor + + solver = CrlsSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CrlsSolver{T,S} <: KrylovSolver{T,S} + x :: S + p :: S + Ar :: S + r :: S + Ap :: S + + function CrlsSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + p = S(undef, m) + Ar = S(undef, m) + r = S(undef, n) + Ap = S(undef, n) + solver = new{T,S}(x, p, Ar, r, Ap) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CGNE. + +The outer constructor + + solver = CgneSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CgneSolver{T,S} <: KrylovSolver{T,S} + x :: S + p :: S + r :: S + + function CgneSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + p = S(undef, m) + r = S(undef, n) + solver = new{T,S}(x, p, r) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CRMR. + +The outer constructor + + solver = CrmrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CrmrSolver{T,S} <: KrylovSolver{T,S} + x :: S + p :: S + r :: S + + function CrmrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + p = S(undef, m) + r = S(undef, n) + solver = new{T,S}(x, p, r) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of LSLQ. + +The outer constructor + + solver = LslqSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct LslqSolver{T,S} <: KrylovSolver{T,S} + x_lq :: S + Nv :: S + w̄ :: S + Mu :: S + + function LslqSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x_lq = S(undef, m) + Nv = S(undef, m) + w̄ = S(undef, m) + Mu = S(undef, n) + solver = new{T,S}(x_lq, Nv, w̄, Mu) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of LSQR. + +The outer constructor + + solver = LsqrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct LsqrSolver{T,S} <: KrylovSolver{T,S} + x :: S + Nv :: S + w :: S + Mu :: S + + function LsqrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + Nv = S(undef, m) + w = S(undef, m) + Mu = S(undef, n) + solver = new{T,S}(x, Nv, w, Mu) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of LSMR. + +The outer constructor + + solver = LsmrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct LsmrSolver{T,S} <: KrylovSolver{T,S} + x :: S + Nv :: S + h :: S + hbar :: S + Mu :: S + + function LsmrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + Nv = S(undef, m) + h = S(undef, m) + hbar = S(undef, m) + Mu = S(undef, n) + solver = new{T,S}(x, Nv, h, hbar, Mu) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of LNLQ. + +The outer constructor + + solver = LnlqSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct LnlqSolver{T,S} <: KrylovSolver{T,S} + x :: S + Nv :: S + y :: S + w̄ :: S + Mu :: S + + function LnlqSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + Nv = S(undef, m) + y = S(undef, n) + w̄ = S(undef, n) + Mu = S(undef, n) + solver = new{T,S}(x, Nv, y, w̄, Mu) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CRAIG. + +The outer constructor + + solver = CraigSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CraigSolver{T,S} <: KrylovSolver{T,S} + x :: S + Nv :: S + y :: S + w :: S + Mu :: S + + function CraigSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + Nv = S(undef, m) + y = S(undef, n) + w = S(undef, n) + Mu = S(undef, n) + solver = new{T,S}(x, Nv, y, w, Mu) + return solver + end +end + +""" +Type for storing the vectors required by the in-place version of CRAIGMR. + +The outer constructor + + solver = CraigmrSolver(A, b) + +may be used in order to create these vectors. +""" +mutable struct CraigmrSolver{T,S} <: KrylovSolver{T,S} + x :: S + Nv :: S + y :: S + Mu :: S + w :: S + wbar :: S + + function CraigmrSolver(A, b) + n, m = size(A) + S = ktypeof(b) + T = eltype(b) + x = S(undef, m) + Nv = S(undef, m) + y = S(undef, n) + Mu = S(undef, n) + w = S(undef, n) + wbar = S(undef, n) + solver = new{T,S}(x, Nv, y, Mu, w, wbar) + return solver + end end diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl index ab11c9ee3..6d0f6d39e 100644 --- a/src/krylov_utils.jl +++ b/src/krylov_utils.jl @@ -166,18 +166,35 @@ function to_boundary(x :: Vector{T}, d :: Vector{T}, return roots # `σ1` and `σ2` end +""" + S = ktypeof(v) + +Return a dense storage type `S` based on the type of `v`. +""" +function ktypeof(v :: AbstractVector) + S = typeof(v) + if S <: SubArray + S = S.types[1] # SubArray + end + if S <: AbstractSparseVector + S = S <: SparseVector ? S.types[3] : S.types[2] # SparseVector / CuSparseVector + end + return S +end + """ v = kzeros(S, n) Create an AbstractVector of storage type `S` of length `n` only composed of zero. """ -@inline kzeros(S, n) = S <: SubArray ? fill!(S.types[1](undef, n), zero(eltype(S))) : fill!(S(undef, n), zero(eltype(S))) +@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S))) + """ v = kones(S, n) Create an AbstractVector of storage type `S` of length `n` only composed of one. """ -@inline kones(S, n) = S <: SubArray ? fill!(S.types[1](undef, n), one(eltype(S))) : fill!(similar(S, n), one(eltype(S))) +@inline kones(S, n) = fill!(S(undef, n), one(eltype(S))) @inline display(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0) diff --git a/src/lnlq.jl b/src/lnlq.jl index ef1cfedd6..1fce5d8ce 100644 --- a/src/lnlq.jl +++ b/src/lnlq.jl @@ -22,7 +22,7 @@ # Alexis Montoison, # Montréal, March 2019 -- Alès, January 2020. -export lnlq +export lnlq, lnlq! """ (x, y, stats) = lnlq(A, b::AbstractVector{T}; @@ -64,10 +64,15 @@ In this implementation, both the x and y-parts of the solution are returned. * R. Estrin, D. Orban, M.A. Saunders, *LNLQ: An Iterative Method for Least-Norm Problems with an Error Minimization Property*, SIAM Journal on Matrix Analysis and Applications, 40(3), pp. 1102--1124, 2019. """ -function lnlq(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), - atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, - transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function lnlq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = LnlqSolver(A, b) + lnlq!(solver, A, b; kwargs...) +end + +function lnlq!(solver :: LnlqSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), + atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) length(b) == m || error("Inconsistent problem size") @@ -79,18 +84,19 @@ function lnlq(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, Nv, y, w̄, Mu = solver.x, solver.Nv, solver.y, solver.w̄, solver.Mu # Initial solutions (x₀, y₀) and residual norm ‖r₀‖. - x = kzeros(S, n) - y = kzeros(S, m) + x .= zero(T) + y .= zero(T) # When solving a SQD system, set regularization parameter λ = 1. sqd && (λ = one(T)) @@ -112,7 +118,7 @@ function lnlq(A, b :: AbstractVector{T}; # Initialize generalized Golub-Kahan bidiagonalization. # β₁Mu₁ = b. - Mu = copy(b) + Mu .= b u = M * Mu # u₁ = M⁻¹ * Mu₁ βₖ = sqrt(@kdot(m, u, Mu)) # β₁ = ‖u₁‖_M if βₖ ≠ 0 @@ -122,7 +128,7 @@ function lnlq(A, b :: AbstractVector{T}; # α₁Nv₁ = Aᵀu₁. Aᵀu = Aᵀ * u - Nv = copy(Aᵀu) + Nv .= Aᵀu v = N * Nv # v₁ = N⁻¹ * Nv₁ αₖ = sqrt(@kdot(n, v, Nv)) # α₁ = ‖v₁‖_N if αₖ ≠ 0 @@ -130,8 +136,7 @@ function lnlq(A, b :: AbstractVector{T}; NisI || @kscal!(n, 1 / αₖ, Nv) end - # Set up workspace. - w̄ = copy(u) # Direction w̄₁ + w̄ .= u # Direction w̄₁ cₖ = sₖ = zero(T) # Givens sines and cosines used for the LQ factorization of (Lₖ)ᵀ ζₖ₋₁ = zero(T) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ ηₖ = zero(T) # Coefficient of M̅ₖ diff --git a/src/lslq.jl b/src/lslq.jl index ac3eedb0d..f7f888920 100644 --- a/src/lslq.jl +++ b/src/lslq.jl @@ -1,7 +1,7 @@ # Dominique Orban, # Montreal, QC, November 2016-January 2017. -export lslq +export lslq, lslq! """ @@ -103,11 +103,16 @@ The iterations stop as soon as one of the following conditions holds true: * R. Estrin, D. Orban and M. A. Saunders, *Euclidean-norm error bounds for SYMMLQ and CG*, SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 235--253, 2019. * R. Estrin, D. Orban and M. A. Saunders, *LSLQ: An Iterative Method for Linear Least-Squares with an Error Minimization Property*, SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 254--275, 2019. """ -function lslq(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), - atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T), - window :: Int=5, utol :: T=√eps(T), itmax :: Int=0, - σ :: T=zero(T), conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function lslq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = LslqSolver(A, b) + lslq!(solver, A, b; kwargs...) +end + +function lslq!(solver :: LslqSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), + atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T), + window :: Int=5, utol :: T=√eps(T), itmax :: Int=0, + σ :: T=zero(T), conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -119,28 +124,29 @@ function lslq(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x_lq, Nv, w̄, Mu = solver.x_lq, solver.Nv, solver.w̄, solver.Mu # If solving an SQD system, set regularization to 1. sqd && (λ = one(T)) λ² = λ * λ ctol = conlim > 0 ? 1/conlim : zero(T) - x_lq = kzeros(S, n) # LSLQ point + x_lq .= zero(T) # LSLQ point err_lbnds = T[] err_ubnds_lq = T[] err_ubnds_cg = T[] # Initialize Golub-Kahan process. # β₁ M u₁ = b. - Mu = copy(b) + Mu .= b u = M * Mu β₁ = sqrt(@kdot(m, u, Mu)) β₁ == 0 && return (x_lq, kzeros(S, n), err_lbnds, err_ubnds_lq, err_ubnds_cg, @@ -150,7 +156,7 @@ function lslq(A, b :: AbstractVector{T}; @kscal!(m, one(T)/β₁, u) MisI || @kscal!(m, one(T)/β₁, Mu) Aᵀu = Aᵀ * u - Nv = copy(Aᵀu) + Nv .= Aᵀu v = N * Nv α = sqrt(@kdot(n, v, Nv)) # = α₁ @@ -173,7 +179,7 @@ function lslq(A, b :: AbstractVector{T}; xcgNorm = zero(T) xcgNorm² = zero(T) - w̄ = copy(v) # w̄₁ = v₁ + w̄ .= v # w̄₁ = v₁ err_lbnd = zero(T) err_vec = zeros(T, window) @@ -301,7 +307,7 @@ function lslq(A, b :: AbstractVector{T}; if σ > 0 && iter > 0 err_ubnd_cg = sqrt(ζ̃ * ζ̃ - ζ̄ * ζ̄ ) - push!(err_ubnds_cg, err_ubnd_cg) + history && push!(err_ubnds_cg, err_ubnd_cg) fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²) end @@ -327,7 +333,7 @@ function lslq(A, b :: AbstractVector{T}; err_vec[mod(iter, window) + 1] = ζ if iter ≥ window err_lbnd = norm(err_vec) - push!(err_lbnds, err_lbnd) + history && push!(err_lbnds, err_lbnd) fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm end @@ -337,7 +343,7 @@ function lslq(A, b :: AbstractVector{T}; ϵ̃ = -ω * c τ̃ = -τ * δ / ω ζ̃ = (τ̃ - ζ * η̃) / ϵ̃ - push!(err_ubnds_lq, abs(ζ̃ )) + history && push!(err_ubnds_lq, abs(ζ̃ )) end # Stopping conditions that do not depend on user input. diff --git a/src/lsmr.jl b/src/lsmr.jl index f7f61b373..e918d8d28 100644 --- a/src/lsmr.jl +++ b/src/lsmr.jl @@ -22,7 +22,7 @@ # Dominique Orban, # Montreal, QC, May 2015. -export lsmr +export lsmr, lsmr! """ @@ -72,13 +72,18 @@ In this case, `N` can still be specified and indicates the weighted norm in whic * D. C.-L. Fong and M. A. Saunders, *LSMR: An Iterative Algorithm for Sparse*, Least Squares Problems, SIAM Journal on Scientific Computing, 33(5), pp. 2950--2971, 2011. """ -function lsmr(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), sqd :: Bool=false, - λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T), - atol :: T=zero(T), rtol :: T=zero(T), - etol :: T=√eps(T), window :: Int=5, - itmax :: Int=0, conlim :: T=1/√eps(T), - radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function lsmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = LsmrSolver(A, b) + lsmr!(solver, A, b; kwargs...) +end + +function lsmr!(solver :: LsmrSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), sqd :: Bool=false, + λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T), + atol :: T=zero(T), rtol :: T=zero(T), + etol :: T=√eps(T), window :: Int=5, + itmax :: Int=0, conlim :: T=1/√eps(T), + radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -87,29 +92,30 @@ function lsmr(A, b :: AbstractVector{T}; # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) - # Tests M == Iₙ and N == Iₘ MisI = isa(M, opEye) NisI = isa(N, opEye) # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' + # Set up workspace. + x, Nv, h, hbar, Mu = solver.x, solver.Nv, solver.h, solver.hbar, solver.Mu + # If solving an SQD system, set regularization to 1. sqd && (λ = one(T)) ctol = conlim > 0 ? 1/conlim : zero(T) - x = kzeros(S, n) + x .= zero(T) # Initialize Golub-Kahan process. # β₁ M u₁ = b. - Mu = copy(b) + Mu .= b u = M * Mu β₁ = sqrt(@kdot(m, u, Mu)) β₁ == 0 && return (x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution")) @@ -118,7 +124,7 @@ function lsmr(A, b :: AbstractVector{T}; @kscal!(m, one(T)/β₁, u) MisI || @kscal!(m, one(T)/β₁, Mu) Aᵀu = Aᵀ * u - Nv = copy(Aᵀu) + Nv .= Aᵀu v = N * Nv α = sqrt(@kdot(n, v, Nv)) @@ -165,8 +171,8 @@ function lsmr(A, b :: AbstractVector{T}; @kscal!(n, one(T)/α, v) NisI || @kscal!(n, one(T)/α, Nv) - h = copy(v) - hbar = kzeros(S, n) + h .= v + hbar .= zero(T) status = "unknown" on_boundary = false diff --git a/src/lsqr.jl b/src/lsqr.jl index 27f3a5709..3bc6f8aff 100644 --- a/src/lsqr.jl +++ b/src/lsqr.jl @@ -22,7 +22,7 @@ # Dominique Orban, # Montreal, QC, May 2015. -export lsqr +export lsqr, lsqr! """ @@ -72,13 +72,18 @@ In this case, `N` can still be specified and indicates the weighted norm in whic * C. C. Paige and M. A. Saunders, *LSQR: An Algorithm for Sparse Linear Equations and Sparse Least Squares*, ACM Transactions on Mathematical Software, 8(1), pp. 43--71, 1982. """ -function lsqr(A, b :: AbstractVector{T}; - M=opEye(), N=opEye(), sqd :: Bool=false, - λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T), - atol :: T=zero(T), rtol :: T=zero(T), - etol :: T=√eps(T), window :: Int=5, - itmax :: Int=0, conlim :: T=1/√eps(T), - radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function lsqr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = LsqrSolver(A, b) + lsqr!(solver, A, b; kwargs...) +end + +function lsqr!(solver :: LsqrSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), N=opEye(), sqd :: Bool=false, + λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T), + atol :: T=zero(T), rtol :: T=zero(T), + etol :: T=√eps(T), window :: Int=5, + itmax :: Int=0, conlim :: T=1/√eps(T), + radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) size(b, 1) == m || error("Inconsistent problem size") @@ -90,24 +95,25 @@ function lsqr(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + x, Nv, w, Mu = solver.x, solver.Nv, solver.w, solver.Mu # If solving an SQD system, set regularization to 1. sqd && (λ = one(T)) λ² = λ * λ ctol = conlim > 0 ? 1/conlim : zero(T) - x = kzeros(S, n) + x .= zeros(T) # Initialize Golub-Kahan process. # β₁ M u₁ = b. - Mu = copy(b) + Mu .= b u = M * Mu β₁ = sqrt(@kdot(m, u, Mu)) β₁ == 0 && return (x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution")) @@ -116,7 +122,7 @@ function lsqr(A, b :: AbstractVector{T}; @kscal!(m, one(T)/β₁, u) MisI || @kscal!(m, one(T)/β₁, Mu) Aᵀu = Aᵀ * u - Nv = copy(Aᵀu) + Nv .= Aᵀu v = N * Nv Anorm² = @kdot(n, v, Nv) Anorm = sqrt(Anorm²) @@ -143,7 +149,7 @@ function lsqr(A, b :: AbstractVector{T}; α == 0 && return (x, SimpleStats(true, false, [β₁], [zero(T)], "x = 0 is a minimum least-squares solution")) @kscal!(n, one(T)/α, v) NisI || @kscal!(n, one(T)/α, Nv) - w = copy(v) + w .= v # Initialize other constants. ϕbar = β₁ diff --git a/src/minres.jl b/src/minres.jl index 74f3ec7d9..d9ebad751 100644 --- a/src/minres.jl +++ b/src/minres.jl @@ -72,7 +72,7 @@ function minres(A, b :: AbstractVector{T}; window :: Int=5, kwargs...) where T < minres!(solver, A, b; kwargs...) end -function minres!(solver :: MinresSolver{T, S}, A, b :: AbstractVector{T}; +function minres!(solver :: MinresSolver{T,S}, A, b :: AbstractVector{T}; M=opEye(), λ :: T=zero(T), atol :: T=√eps(T)/100, rtol :: T=√eps(T)/100, ratol :: T=zero(T), rrtol :: T=zero(T), etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T), @@ -85,10 +85,12 @@ function minres!(solver :: MinresSolver{T, S}, A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T") - # get data from solver + # Set up workspace. x, r1, r2, w1, w2, err_vec, stats = solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.err_vec, solver.stats + window = length(err_vec) rNorms, ArNorms = stats.residuals, stats.Aresiduals !history && !isempty(rNorms) && (rNorms = T[]) diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl index 2a29dfa7a..9f35268a1 100644 --- a/src/minres_qlp.jl +++ b/src/minres_qlp.jl @@ -14,7 +14,7 @@ # Alexis Montoison, # Montreal, September 2019. -export minres_qlp +export minres_qlp, minres_qlp! """ (x, stats) = minres_qlp(A, b::AbstractVector{T}; @@ -35,9 +35,14 @@ M also indicates the weighted norm in which residuals are measured. * S.-C. T. Choi, C. C. Paige and M. A. Saunders, *MINRES-QLP: A Krylov subspace method for indefinite or singular symmetric systems*, SIAM Journal on Scientific Computing, Vol. 33(4), pp. 1810--1836, 2011. * S.-C. T. Choi and M. A. Saunders, *Algorithm 937: MINRES-QLP for symmetric and Hermitian linear equations and least-squares problems*, ACM Transactions on Mathematical Software, 40(2), pp. 1--12, 2014. """ -function minres_qlp(A, b :: AbstractVector{T}; - M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), λ ::T=zero(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function minres_qlp(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = MinresQlpSolver(A, b) + minres_qlp!(solver, A, b; kwargs...) +end + +function minres_qlp!(solver :: MinresQlpSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), λ ::T=zero(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} n, m = size(A) m == n || error("System must be square") @@ -49,16 +54,17 @@ function minres_qlp(A, b :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.x # Initial solution x₀ - x = kzeros(S, n) + x .= zero(T) # β₁v₁ = Mb - M⁻¹vₖ = copy(b) + M⁻¹vₖ .= b vₖ = M * M⁻¹vₖ βₖ = sqrt(@kdot(n, vₖ, M⁻¹vₖ)) if βₖ ≠ 0 @@ -80,14 +86,14 @@ function minres_qlp(A, b :: AbstractVector{T}; display(iter, verbose) && @printf("%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗") # Set up workspace. - M⁻¹vₖ₋₁ = kzeros(S, n) + M⁻¹vₖ₋₁ .= zero(T) ζbarₖ = βₖ ξₖ₋₁ = zero(T) τₖ₋₂ = τₖ₋₁ = τₖ = zero(T) ψbarₖ₋₂ = zero(T) μbisₖ₋₂ = μbarₖ₋₁ = zero(T) - wₖ₋₁ = kzeros(S, n) - wₖ = kzeros(S, n) + wₖ₋₁ .= zero(T) + wₖ .= zero(T) cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ diff --git a/src/qmr.jl b/src/qmr.jl index 7579591b9..7704d7a63 100644 --- a/src/qmr.jl +++ b/src/qmr.jl @@ -18,7 +18,7 @@ # Alexis Montoison, # Montreal, May 2019. -export qmr +export qmr, qmr! """ (x, stats) = qmr(A, b::AbstractVector{T}; c::AbstractVector{T}=b, @@ -36,9 +36,14 @@ When A is symmetric and b = c, QMR is equivalent to MINRES. * R. W. Freund and N. M. Nachtigal, *An implementation of the QMR method based on coupled two-term recurrences*, SIAM Journal on Scientific Computing, Vol. 15(2), pp. 313--337, 1994. * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. """ -function qmr(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, - atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function qmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = QmrSolver(A, b) + qmr!(solver, A, b; kwargs...) +end + +function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, + atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} n, m = size(A) m == n || error("System must be square") @@ -47,16 +52,18 @@ function qmr(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.wₖ₋₂, solver.wₖ₋₁ # Initial solution x₀ and residual norm ‖r₀‖. - x = kzeros(S, n) - rNorm = @knrm2(n, b) # rNorm = ‖r₀‖ + x .= zero(T) + rNorm = @knrm2(n, b) # ‖r₀‖ rNorm == 0 && return (x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution")) iter = 0 @@ -71,17 +78,16 @@ function qmr(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b, bᵗc = @kdot(n, b, c) # ⟨b,c⟩ bᵗc == 0 && return (x, SimpleStats(false, false, [rNorm], T[], "Breakdown bᵀc = 0")) - # Set up uorkspace. βₖ = √(abs(bᵗc)) # β₁γ₁ = bᵀc γₖ = bᵗc / βₖ # β₁γ₁ = bᵀc - vₖ₋₁ = kzeros(S, n) # v₀ = 0 - uₖ₋₁ = kzeros(S, n) # u₀ = 0 - vₖ = b / βₖ # v₁ = b / β₁ - uₖ = c / γₖ # u₁ = c / γ₁ + vₖ₋₁ .= zero(T) # v₀ = 0 + uₖ₋₁ .= zero(T) # u₀ = 0 + vₖ .= b ./ βₖ # v₁ = b / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ - wₖ₋₂ = kzeros(S, n) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ - wₖ₋₁ = kzeros(S, n) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ + wₖ₋₂ .= zero(T) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ + wₖ₋₁ .= zero(T) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁ τₖ = @kdot(n, vₖ, vₖ) # τₖ is used for the residual norm estimate diff --git a/src/symmlq.jl b/src/symmlq.jl index d32cd5203..32682c3d9 100644 --- a/src/symmlq.jl +++ b/src/symmlq.jl @@ -9,7 +9,7 @@ # # Ron Estrin, -export symmlq +export symmlq, symmlq! """ @@ -35,34 +35,40 @@ assumed to be symmetric and positive definite. * C. C. Paige and M. A. Saunders, *Solution of Sparse Indefinite Systems of Linear Equations*, SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975. """ -function symmlq(A, b :: AbstractVector{T}; - M=opEye(), λ :: T=zero(T), transfer_to_cg :: Bool=true, - λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - etol :: T=√eps(T), window :: Int=0, itmax :: Int=0, - conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function symmlq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = SymmlqSolver(A, b) + symmlq!(solver, A, b; kwargs...) +end + +function symmlq!(solver :: SymmlqSolver{T,S}, A, b :: AbstractVector{T}; + M=opEye(), λ :: T=zero(T), transfer_to_cg :: Bool=true, + λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), + etol :: T=√eps(T), window :: Int=0, itmax :: Int=0, + conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) m == n || error("System must be square") size(b, 1) == m || error("Inconsistent problem size") (verbose > 0) && @printf("SYMMLQ: system of size %d\n", n) - # Determine the storage type of b - S = typeof(b) - # Test M == Iₘ MisI = isa(M, opEye) # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") + # Set up workspace. + x, Mvold, Mv, w̅ = solver.x, solver.Mvold, solver.Mv, solver.w̅ + ϵM = eps(T) - x = kzeros(S, n) + x .= zero(T) ctol = conlim > 0 ? 1 / conlim : zero(T) # Initialize Lanczos process. # β₁ M v₁ = b. - Mvold = copy(b) + Mvold .= b vold = M * Mvold β₁ = @kdot(m, vold, Mvold) β₁ == 0 && return (x, SimpleStats(true, true, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution")) @@ -71,9 +77,9 @@ function symmlq(A, b :: AbstractVector{T}; @kscal!(m, one(T) / β, vold) MisI || @kscal!(m, one(T) / β, Mvold) - w̅ = copy(vold) + w̅ .= vold - Mv = copy(A * vold) + Mv .= A * vold α = @kdot(m, vold, Mv) + λ @kaxpy!(m, -α, Mvold, Mv) # Mv = Mv - α * Mvold v = M * Mv diff --git a/src/tricg.jl b/src/tricg.jl index 136bf1509..aeaf1f75a 100644 --- a/src/tricg.jl +++ b/src/tricg.jl @@ -9,7 +9,7 @@ # Alexis Montoison, # Montréal, April 2020. -export tricg +export tricg, tricg! """ (x, y, stats) = tricg(A, b::AbstractVector{T}, c::AbstractVector{T}; @@ -34,7 +34,7 @@ If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite TriCG is based on the preconditioned orthogonal tridiagonalization process and its relation with the preconditioned block-Lanczos process. - [ M O ] + [ M 0 ] [ 0 N ] indicates the weighted norm in which residuals are measured. @@ -50,10 +50,15 @@ Information will be displayed every `verbose` iterations. * A. Montoison and D. Orban, *TriCG and TriMR: Two Iterative Methods for Symmetric Quasi-Definite Systems*, Cahier du GERAD G-2020-41, GERAD, Montréal, 2020. """ -function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T}; - M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), - spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, - τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = TricgSolver(A, b) + tricg!(solver, A, b, c; kwargs...) +end + +function tricg!(solver :: TricgSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T}; + M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), + spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, + τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) length(b) == m || error("Inconsistent problem size") @@ -71,28 +76,31 @@ function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.yₖ, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.xₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ + gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ # Initial solutions x₀ and y₀. - xₖ = kzeros(S, m) - yₖ = kzeros(S, n) + xₖ .= zero(T) + yₖ .= zero(T) iter = 0 itmax == 0 && (itmax = m+n) # Initialize preconditioned orthogonal tridiagonalization process. - M⁻¹vₖ₋₁ = kzeros(S, m) # v₀ = 0 - N⁻¹uₖ₋₁ = kzeros(S, n) # u₀ = 0 + M⁻¹vₖ₋₁ .= zero(T) # v₀ = 0 + N⁻¹uₖ₋₁ .= zero(T) # u₀ = 0 # β₁Ev₁ = b ↔ β₁v₁ = Mb - M⁻¹vₖ = copy(b) + M⁻¹vₖ .= b vₖ = M * M⁻¹vₖ βₖ = sqrt(@kdot(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E if βₖ ≠ 0 @@ -101,7 +109,7 @@ function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T}; end # γ₁Fu₁ = c ↔ γ₁u₁ = Nb - N⁻¹uₖ = copy(c) + N⁻¹uₖ .= c uₖ = N * N⁻¹uₖ γₖ = sqrt(@kdot(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F if γₖ ≠ 0 @@ -110,10 +118,10 @@ function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T}; end # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ - gx₂ₖ₋₁ = kzeros(S, m) - gy₂ₖ₋₁ = kzeros(S, n) - gx₂ₖ = kzeros(S, m) - gy₂ₖ = kzeros(S, n) + gx₂ₖ₋₁ .= zero(T) + gy₂ₖ₋₁ .= zero(T) + gx₂ₖ .= zero(T) + gy₂ₖ .= zero(T) # Compute ‖r₀‖² = (γ₁)² + (β₁)² rNorm = sqrt(γₖ^2 + βₖ^2) diff --git a/src/trilqr.jl b/src/trilqr.jl index fa74d7b6c..96a7c751b 100644 --- a/src/trilqr.jl +++ b/src/trilqr.jl @@ -10,7 +10,7 @@ # Alexis Montoison, # Montreal, July 2019. -export trilqr +export trilqr, trilqr! """ (x, t, stats) = trilqr(A, b::AbstractVector{T}, c::AbstractVector{T}; @@ -32,9 +32,14 @@ USYMCG point, when it exists. The transfer is based on the residual norm. * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. """ -function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = TrilqrSolver(A, b) + trilqr!(solver, A, b, c; kwargs...) +end + +function trilqr!(solver :: TrilqrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T}; + atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) length(b) == m || error("Inconsistent problem size") @@ -44,19 +49,21 @@ function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.t, solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂ # Initial solution x₀ and residual r₀ = b - Ax₀. - x = kzeros(S, n) # x₀ + x .= zero(T) # x₀ bNorm = @knrm2(m, b) # rNorm = ‖r₀‖ # Initial solution y₀ and residual s₀ = c - Aᵀy₀. - t = kzeros(S, m) # t₀ + t .= zero(T) # t₀ cNorm = @knrm2(n, c) # sNorm = ‖s₀‖ iter = 0 @@ -73,20 +80,20 @@ function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Set up workspace. βₖ = @knrm2(m, b) # β₁ = ‖v₁‖ γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ - vₖ₋₁ = kzeros(S, m) # v₀ = 0 - uₖ₋₁ = kzeros(S, n) # u₀ = 0 - vₖ = b / βₖ # v₁ = b / β₁ - uₖ = c / γₖ # u₁ = c / γ₁ + vₖ₋₁ .= zero(T) # v₀ = 0 + uₖ₋₁ .= zero(T) # u₀ = 0 + vₖ .= b ./ βₖ # v₁ = b / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(T) # Givens sines used for the LQ factorization of Tₖ - d̅ = kzeros(S, n) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ + d̅ .= zero(T) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ ζₖ₋₁ = ζbarₖ = zero(T) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(T) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(T) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations ψbarₖ₋₁ = ψₖ₋₁ = zero(T) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁ ϵₖ₋₃ = λₖ₋₂ = zero(T) # Components of Lₖ₋₁ - wₖ₋₃ = kzeros(S, m) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ - wₖ₋₂ = kzeros(S, m) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ + wₖ₋₃ .= zero(T) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ + wₖ₋₂ .= zero(T) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ # Stopping criterion. inconsistent = false diff --git a/src/trimr.jl b/src/trimr.jl index dbcee7d41..5e02fa8eb 100644 --- a/src/trimr.jl +++ b/src/trimr.jl @@ -9,7 +9,7 @@ # Alexis Montoison, # Montréal, June 2020. -export trimr +export trimr, trimr! """ (x, y, stats) = trimr(A, b::AbstractVector{T}, c::AbstractVector{T}; @@ -35,7 +35,7 @@ If `sp = true`, τ = 1, ν = 0 and the associated saddle-point linear system is TriMR is based on the preconditioned orthogonal tridiagonalization process and its relation with the preconditioned block-Lanczos process. - [ M O ] + [ M 0 ] [ 0 N ] indicates the weighted norm in which residuals are measured. @@ -51,10 +51,15 @@ Information will be displayed every `verbose` iterations. * A. Montoison and D. Orban, *TriCG and TriMR: Two Iterative Methods for Symmetric Quasi-Definite Systems*, Cahier du GERAD G-2020-41, GERAD, Montréal, 2020. """ -function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; - M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), - spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false, - τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = TrimrSolver(A, b) + trimr!(solver, A, b, c; kwargs...) +end + +function trimr!(solver :: TrimrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T}; + M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), + spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false, + τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) length(b) == m || error("Inconsistent problem size") @@ -75,28 +80,31 @@ function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") MisI || (eltype(M) == T) || error("eltype(M) ≠ $T") NisI || (eltype(N) == T) || error("eltype(N) ≠ $T") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.yₖ, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ, solver.xₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ + gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ # Initial solutions x₀ and y₀. - xₖ = kzeros(S, m) - yₖ = kzeros(S, n) + xₖ .= zero(T) + yₖ .= zero(T) iter = 0 itmax == 0 && (itmax = m+n) # Initialize preconditioned orthogonal tridiagonalization process. - M⁻¹vₖ₋₁ = kzeros(S, m) # v₀ = 0 - N⁻¹uₖ₋₁ = kzeros(S, n) # u₀ = 0 + M⁻¹vₖ₋₁ .= zero(T) # v₀ = 0 + N⁻¹uₖ₋₁ .= zero(T) # u₀ = 0 # β₁Ev₁ = b ↔ β₁v₁ = Mb - M⁻¹vₖ = copy(b) + M⁻¹vₖ .= b vₖ = M * M⁻¹vₖ βₖ = sqrt(@kdot(m, vₖ, M⁻¹vₖ)) # β₁ = ‖v₁‖_E if βₖ ≠ 0 @@ -105,7 +113,7 @@ function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; end # γ₁Fu₁ = c ↔ γ₁u₁ = Nb - N⁻¹uₖ = copy(c) + N⁻¹uₖ .= c uₖ = N * N⁻¹uₖ γₖ = sqrt(@kdot(n, uₖ, N⁻¹uₖ)) # γ₁ = ‖u₁‖_F if γₖ ≠ 0 @@ -114,14 +122,14 @@ function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; end # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ. - gx₂ₖ₋₃ = kzeros(S, m) - gy₂ₖ₋₃ = kzeros(S, n) - gx₂ₖ₋₂ = kzeros(S, m) - gy₂ₖ₋₂ = kzeros(S, n) - gx₂ₖ₋₁ = kzeros(S, m) - gy₂ₖ₋₁ = kzeros(S, n) - gx₂ₖ = kzeros(S, m) - gy₂ₖ = kzeros(S, n) + gx₂ₖ₋₃ .= zero(T) + gy₂ₖ₋₃ .= zero(T) + gx₂ₖ₋₂ .= zero(T) + gy₂ₖ₋₂ .= zero(T) + gx₂ₖ₋₁ .= zero(T) + gy₂ₖ₋₁ .= zero(T) + gx₂ₖ .= zero(T) + gy₂ₖ .= zero(T) # Compute ‖r₀‖² = (γ₁)² + (β₁)² rNorm = sqrt(γₖ^2 + βₖ^2) diff --git a/src/usymlq.jl b/src/usymlq.jl index b6b521088..1db2ca35f 100644 --- a/src/usymlq.jl +++ b/src/usymlq.jl @@ -17,7 +17,7 @@ # Alexis Montoison, # Montreal, November 2018. -export usymlq +export usymlq, usymlq! """ (x, stats) = usymlq(A, b::AbstractVector{T}, c::AbstractVector{T}; @@ -42,9 +42,14 @@ when it exists. The transfer is based on the residual norm. * A. Buttari, D. Orban, D. Ruiz and D. Titley-Peloquin, *A tridiagonalization method for symmetric saddle-point and quasi-definite systems*, SIAM Journal on Scientific Computing, 41(5), pp. 409--432, 2019. * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. """ -function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = UsymlqSolver(A, b) + usymlq!(solver, A, b, c; kwargs...) +end + +function usymlq!(solver :: UsymlqSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T}; + atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) length(b) == m || error("Inconsistent problem size") @@ -53,16 +58,18 @@ function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + uₖ₋₁, uₖ, x, d̅, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.x, solver.d̅, solver.vₖ₋₁, solver.vₖ # Initial solution x₀ and residual norm ‖r₀‖. - x = kzeros(S, n) - bNorm = @knrm2(m, b) # ‖r₀‖ + x .= zero(T) + bNorm = @knrm2(m, b) bNorm == 0 && return (x, SimpleStats(true, false, [bNorm], T[], "x = 0 is a zero-residual solution")) iter = 0 @@ -73,16 +80,15 @@ function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T}; (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") display(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm) - # Set up workspace. βₖ = @knrm2(m, b) # β₁ = ‖v₁‖ γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ - vₖ₋₁ = kzeros(S, m) # v₀ = 0 - uₖ₋₁ = kzeros(S, n) # u₀ = 0 - vₖ = b / βₖ # v₁ = b / β₁ - uₖ = c / γₖ # u₁ = c / γ₁ + vₖ₋₁ .= zero(T) # v₀ = 0 + uₖ₋₁ .= zero(T) # u₀ = 0 + vₖ .= b ./ βₖ # v₁ = b / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(T) # Givens sines used for the LQ factorization of Tₖ - d̅ = kzeros(S, n) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ + d̅ .= zero(T) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ ζₖ₋₁ = ζbarₖ = zero(T) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(T) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(T) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations diff --git a/src/usymqr.jl b/src/usymqr.jl index bb739b9dc..6cf3d7b7a 100644 --- a/src/usymqr.jl +++ b/src/usymqr.jl @@ -17,7 +17,7 @@ # Alexis Montoison, # Montreal, November 2018. -export usymqr +export usymqr, usymqr! """ (x, stats) = usymqr(A, b::AbstractVector{T}, c::AbstractVector{T}; @@ -39,9 +39,14 @@ USYMQR finds the minimum-norm solution if problems are inconsistent. * A. Buttari, D. Orban, D. Ruiz and D. Titley-Peloquin, *A tridiagonalization method for symmetric saddle-point and quasi-definite systems*, SIAM Journal on Scientific Computing, 41(5), pp. 409--432, 2019. * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. """ -function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; - atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat +function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat + solver = UsymqrSolver(A, b) + usymqr!(solver, A, b, c; kwargs...) +end + +function usymqr!(solver :: UsymqrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T}; + atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat} m, n = size(A) length(b) == m || error("Inconsistent problem size") @@ -50,15 +55,17 @@ function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; # Check type consistency eltype(A) == T || error("eltype(A) ≠ $T") + ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(c) == S || error("ktypeof(c) ≠ $S") # Compute the adjoint of A Aᵀ = A' - # Determine the storage type of b - S = typeof(b) + # Set up workspace. + vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ = solver.vₖ₋₁, solver.vₖ, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ # Initial solution x₀ and residual norm ‖r₀‖. - x = kzeros(S, n) + x .= zero(T) rNorm = @knrm2(m, b) rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution") @@ -72,17 +79,16 @@ function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖") display(iter, verbose) && @printf("%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗") - # Set up workspace. βₖ = @knrm2(m, b) # β₁ = ‖v₁‖ γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ - vₖ₋₁ = kzeros(S, m) # v₀ = 0 - uₖ₋₁ = kzeros(S, n) # u₀ = 0 - vₖ = b / βₖ # v₁ = b / β₁ - uₖ = c / γₖ # u₁ = c / γ₁ + vₖ₋₁ .= zero(T) # v₀ = 0 + uₖ₋₁ .= zero(T) # u₀ = 0 + vₖ .= b ./ βₖ # v₁ = b / β₁ + uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₂ = cₖ₋₁ = cₖ = zero(T) # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ sₖ₋₂ = sₖ₋₁ = sₖ = zero(T) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ - wₖ₋₂ = kzeros(S, n) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹ - wₖ₋₁ = kzeros(S, n) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹ + wₖ₋₂ .= zero(T) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹ + wₖ₋₁ .= zero(T) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁ # Stopping criterion. diff --git a/src/variants.jl b/src/variants.jl index fb4049433..33a560b48 100644 --- a/src/variants.jl +++ b/src/variants.jl @@ -28,49 +28,31 @@ end # Variants where matrix-vector products with A and Aᵀ are required for fn in (:cgls, :cgne, :lnlq, :craig, :craigmr, :crls, :crmr, :lslq, :lsqr, :lsmr, :bilq, :qmr) @eval begin - $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A), convert(Vector{T}, b); wrap_preconditioners(kwargs, Vector{T})...) - $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A, storagetype=typeof(b)), b; wrap_preconditioners(kwargs, typeof(b))...) + $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b)), b; wrap_preconditioners(kwargs, ktypeof(b))...) end end # Variants for USYMLQ, USYMQR, TriCG, TriMR, TriLQR and BiLQR for fn in (:usymlq, :usymqr, :tricg, :trimr, :trilqr, :bilqr) @eval begin - $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}, c :: SparseVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A), convert(Vector{T}, b), convert(Vector{T}, c); wrap_preconditioners(kwargs, Vector{T})...) - - $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}, c :: SparseVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A), b, convert(Vector{T}, c); wrap_preconditioners(kwargs, Vector{T})...) - - $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A), convert(Vector{T}, b), c; wrap_preconditioners(kwargs, Vector{T})...) - $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A, storagetype=typeof(b)), b, c; wrap_preconditioners(kwargs, typeof(b))...) + $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b)), b, c; wrap_preconditioners(kwargs, ktypeof(b))...) end end # Variants where matrix-vector products with A are only required for fn in (:cg_lanczos, :cg, :cr, :minres, :minres_qlp, :symmlq, :cgs, :bicgstab, :diom, :dqgmres) @eval begin - $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A, symmetric=true), convert(Vector{T}, b); wrap_preconditioners(kwargs, Vector{T})...) - $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A, storagetype=typeof(b), symmetric=true), b; wrap_preconditioners(kwargs, typeof(b))...) + $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b), symmetric=true), b; wrap_preconditioners(kwargs, ktypeof(b))...) end end # Variants for CG-LANCZOS-SHIFT-SEQ for fn in [:cg_lanczos_shift_seq] @eval begin - $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}, shifts :: AbstractVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A, symmetric=true), convert(Vector{T}, b), shifts; wrap_preconditioners(kwargs, Vector{T})...) - $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}, shifts :: AbstractVector{T}; kwargs...) where T <: AbstractFloat = - $fn(PreallocatedLinearOperator(A, storagetype=typeof(b), symmetric=true), b, shifts; wrap_preconditioners(kwargs, typeof(b))...) + $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b), symmetric=true), b, shifts; wrap_preconditioners(kwargs, ktypeof(b))...) end end diff --git a/test/test_alloc.jl b/test/test_alloc.jl index 421cf1587..b5e2eb146 100644 --- a/test/test_alloc.jl +++ b/test/test_alloc.jl @@ -30,6 +30,11 @@ function test_alloc() actual_symmlq_bytes = @allocated symmlq(A, b) @test actual_symmlq_bytes ≤ 1.1 * expected_symmlq_bytes + solver = SymmlqSolver(A, b) + symmlq!(solver, A, b) # warmup + inplace_symmlq_bytes = @allocated symmlq!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_symmlq_bytes == 672) + # without preconditioner and with Ap preallocated, CG needs 3 n-vectors: x, r, p storage_cg(n) = 3 * n storage_cg_bytes(n) = 8 * storage_cg(n) @@ -39,6 +44,11 @@ function test_alloc() actual_cg_bytes = @allocated cg(A, b) @test actual_cg_bytes ≤ 1.1 * expected_cg_bytes + solver = CgSolver(A, b) + cg!(solver, A, b) # warmup + inplace_cg_bytes = @allocated cg!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_cg_bytes == 208) + # without preconditioner and with Ap preallocated, MINRES needs 5 n-vectors: x, r1, r2, w1, w2 storage_minres(n) = 5 * n storage_minres_bytes(n) = 8 * storage_minres(n) @@ -67,6 +77,11 @@ function test_alloc() actual_diom_bytes = @allocated diom(A, b, memory=mem) @test actual_diom_bytes ≤ 1.05 * expected_diom_bytes + solver = DiomSolver(A, b) + diom!(solver, A, b) # warmup + inplace_diom_bytes = @allocated diom!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_diom_bytes == 208) + # with Ap preallocated, CG_LANCZOS needs 4 n-vectors: x, v, v_prev, p storage_cg_lanczos(n) = 4 * n storage_cg_lanczos_bytes(n) = 8 * storage_cg_lanczos(n) @@ -76,12 +91,17 @@ function test_alloc() actual_cg_lanczos_bytes = @allocated cg_lanczos(A, b) @test actual_cg_lanczos_bytes ≤ 1.1 * expected_cg_lanczos_bytes + solver = CgLanczosSolver(A, b) + cg_lanczos!(solver, A, b) # warmup + inplace_cg_lanczos_bytes = @allocated cg_lanczos!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_cg_lanczos_bytes == 144) + # with Ap preallocated, CG_LANCZOS_SHIFT_SEQ needs: # - 2 n-vectors: v, v_prev # - 2 (n*nshifts)-matrices: x, p # - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms - # - 2 nshifts-bitArray: indefinite, converged - storage_cg_lanczos_shift_seq(n, nshifts) = (2 * n) + (2 * n * nshifts) + (5 * nshifts) + (2 * nshifts / 64) + # - 3 nshifts-bitArray: indefinite, converged, not_cv + storage_cg_lanczos_shift_seq(n, nshifts) = (2 * n) + (2 * n * nshifts) + (5 * nshifts) + (3 * nshifts / 64) storage_cg_lanczos_shift_seq_bytes(n, nshifts) = 8 * storage_cg_lanczos_shift_seq(n, nshifts) expected_cg_lanczos_shift_seq_bytes = storage_cg_lanczos_shift_seq_bytes(n, nshifts) @@ -89,6 +109,12 @@ function test_alloc() actual_cg_lanczos_shift_seq_bytes = @allocated cg_lanczos_shift_seq(A, b, shifts) @test actual_cg_lanczos_shift_seq_bytes ≤ 1.1 * expected_cg_lanczos_shift_seq_bytes + solver = CgLanczosShiftSolver(A, b, shifts) + cg_lanczos_shift_seq!(solver, A, b, shifts) # warmup + inplace_cg_lanczos_shift_seq_bytes = @allocated cg_lanczos_shift_seq!(solver, A, b, shifts) + println(inplace_cg_lanczos_shift_seq_bytes) + @test (VERSION < v"1.5") || (inplace_cg_lanczos_shift_seq_bytes == 320) + # without preconditioner and with Ap preallocated, DQGMRES needs: # - 1 n-vector: x # - 2 (n*mem)-matrices: P, V @@ -102,6 +128,11 @@ function test_alloc() actual_dqgmres_bytes = @allocated dqgmres(A, b, memory=mem) @test actual_dqgmres_bytes ≤ 1.05 * expected_dqgmres_bytes + solver = DqgmresSolver(A, b) + dqgmres!(solver, A, b) # warmup + inplace_dqgmres_bytes = @allocated dqgmres!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_dqgmres_bytes == 208) + # without preconditioner and with Ap preallocated, CR needs 4 n-vectors: x, r, p, q storage_cr(n) = 4 * n storage_cr_bytes(n) = 8 * storage_cr(n) @@ -111,6 +142,11 @@ function test_alloc() actual_cr_bytes = @allocated cr(A, b, rtol=1e-6) @test actual_cr_bytes ≤ 1.1 * expected_cr_bytes + solver = CrSolver(A, b) + cr!(solver, A, b, rtol=1e-6) # warmup + inplace_cr_bytes = @allocated cr!(solver, A, b, rtol=1e-6) + @test (VERSION < v"1.5") || (inplace_cr_bytes == 208) + # without preconditioner and with (Ap, Aᵀq) preallocated, CRMR needs: # - 2 n-vectors: x, p # - 1 m-vector: r @@ -122,6 +158,11 @@ function test_alloc() actual_crmr_bytes = @allocated crmr(Au, c) @test actual_crmr_bytes ≤ 1.1 * expected_crmr_bytes + solver = CrmrSolver(Au, c) + crmr!(solver, Au, c) # warmup + inplace_crmr_bytes = @allocated crmr!(solver, Au, c) + @test (VERSION < v"1.5") || (inplace_crmr_bytes == 208) + # without preconditioner and with Ap preallocated, CGS needs 5 n-vectors: x, r, u, p, q storage_cgs(n) = 5 * n storage_cgs_bytes(n) = 8 * storage_cgs(n) @@ -131,6 +172,11 @@ function test_alloc() actual_cgs_bytes = @allocated cgs(A, b) @test actual_cgs_bytes ≤ 1.1 * expected_cgs_bytes + solver = CgsSolver(A, b) + cgs!(solver, A, b) # warmup + inplace_cgs_bytes = @allocated cgs!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_cgs_bytes == 208) + # without preconditioner and with Ap preallocated, BICGSTAB needs 5 n-vectors: x, r, p, v, s storage_bicgstab(n) = 5 * n storage_bicgstab_bytes(n) = 8 * storage_bicgstab(n) @@ -140,6 +186,11 @@ function test_alloc() actual_bicgstab_bytes = @allocated bicgstab(A, b) @test actual_bicgstab_bytes ≤ 1.1 * expected_bicgstab_bytes + solver = BicgstabSolver(A, b) + bicgstab!(solver, A, b) # warmup + inplace_bicgstab_bytes = @allocated bicgstab!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_bicgstab_bytes == 208) + # with (Ap, Aᵀq) preallocated, CRAIGMR needs: # - 2 n-vector: x, v # - 4 m-vectors: y, u, w, wbar @@ -151,6 +202,11 @@ function test_alloc() actual_craigmr_bytes = @allocated craigmr(Au, c) @test actual_craigmr_bytes ≤ 1.1 * expected_craigmr_bytes + solver = CraigmrSolver(Au, c) + craigmr!(solver, Au, c) # warmup + inplace_craigmr_bytes = @allocated craigmr!(solver, Au, c) + @test (VERSION < v"1.5") || (inplace_craigmr_bytes == 208) + # without preconditioner and with (Ap, Aᵀq) preallocated, CGNE needs: # - 2 n-vectors: x, p # - 1 m-vector: r @@ -162,6 +218,11 @@ function test_alloc() actual_cgne_bytes = @allocated cgne(Au, c) @test actual_cgne_bytes ≤ 1.1 * expected_cgne_bytes + solver = CgneSolver(Au, c) + cgne!(solver, Au, c) # warmup + inplace_cgne_bytes = @allocated cgne!(solver, Au, c) + @test (VERSION < v"1.5") || (inplace_cgne_bytes == 208) + # with (Ap, Aᵀq) preallocated, LNLQ needs: # - 2 n-vector: x, v # - 3 m-vectors: y, w̄, u @@ -173,6 +234,11 @@ function test_alloc() actual_lnlq_bytes = @allocated lnlq(Au, c) @test actual_lnlq_bytes ≤ 1.1 * expected_lnlq_bytes + solver = LnlqSolver(Au, c) + lnlq!(solver, Au, c) # warmup + inplace_lnlq_bytes = @allocated lnlq!(solver, Au, c) + @test (VERSION < v"1.5") || (inplace_lnlq_bytes == 208) + # with (Ap, Aᵀq) preallocated, CRAIG needs: # - 2 n-vector: x, v # - 3 m-vectors: y, w, u @@ -184,6 +250,11 @@ function test_alloc() actual_craig_bytes = @allocated craig(Au, c) @test actual_craig_bytes ≤ 1.1 * expected_craig_bytes + solver = CraigSolver(Au, c) + craig!(solver, Au, c) # warmup + inplace_craig_bytes = @allocated craig!(solver, Au, c) + @test (VERSION < v"1.5") || (inplace_craig_bytes == 208) + # without preconditioner and with (Ap, Aᵀq) preallocated, LSLQ needs: # - 3 m-vectors: x_lq, v, w̄ (= x_cg) # - 1 n-vector: u @@ -195,6 +266,11 @@ function test_alloc() actual_lslq_bytes = @allocated lslq(Ao, b) @test actual_lslq_bytes ≤ 1.1 * expected_lslq_bytes + solver = LslqSolver(Ao, b) + lslq!(solver, Ao, b) # warmup + inplace_lslq_bytes = @allocated lslq!(solver, Ao, b) + @test (VERSION < v"1.5") || (inplace_lslq_bytes == 576) + # without preconditioner and with (Ap, Aᵀq) preallocated, CGLS needs: # - 2 m-vectors: x, p # - 1 n-vector: r @@ -206,6 +282,11 @@ function test_alloc() actual_cgls_bytes = @allocated cgls(Ao, b) @test actual_cgls_bytes ≤ 1.1 * expected_cgls_bytes + solver = CglsSolver(Ao, b) + cgls!(solver, Ao, b) # warmup + inplace_cgls_bytes = @allocated cgls!(solver, Ao, b) + @test (VERSION < v"1.5") || (inplace_cgls_bytes == 208) + # without preconditioner and with (Ap, Aᵀq) preallocated, LSQR needs: # - 3 m-vectors: x, v, w # - 1 n-vector: u @@ -217,6 +298,11 @@ function test_alloc() actual_lsqr_bytes = @allocated lsqr(Ao, b) @test actual_lsqr_bytes ≤ 1.1 * expected_lsqr_bytes + solver = LsqrSolver(Ao, b) + lsqr!(solver, Ao, b) # warmup + inplace_lsqr_bytes = @allocated lsqr!(solver, Ao, b) + @test (VERSION < v"1.5") || (inplace_lsqr_bytes == 432) + # without preconditioner and with (Ap, Aᵀq) preallocated, CRLS needs: # - 3 m-vectors: x, p, Ar # - 2 n-vector: r, Ap @@ -228,6 +314,11 @@ function test_alloc() actual_crls_bytes = @allocated crls(Ao, b) @test actual_crls_bytes ≤ 1.1 * expected_crls_bytes + solver = CrlsSolver(Ao, b) + crls!(solver, Ao, b) # warmup + inplace_crls_bytes = @allocated crls!(solver, Ao, b) + @test (VERSION < v"1.5") || (inplace_crls_bytes == 208) + # without preconditioner and with (Ap, Aᵀq) preallocated, LSMR needs: # - 4 m-vectors: x, v, h, hbar # - 1 n-vector: u @@ -239,6 +330,11 @@ function test_alloc() actual_lsmr_bytes = @allocated lsmr(Ao, b) @test actual_lsmr_bytes ≤ 1.1 * expected_lsmr_bytes + solver = LsmrSolver(Ao, b) + lsmr!(solver, Ao, b) # warmup + inplace_lsmr_bytes = @allocated lsmr!(solver, Ao, b) + @test (VERSION < v"1.5") || (inplace_lsmr_bytes == 336) + # with (Ap, Aᵀq) preallocated, USYMQR needs: # - 5 m-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ # - 2 n-vectors: uₖ₋₁, uₖ @@ -250,6 +346,11 @@ function test_alloc() actual_usymqr_bytes = @allocated usymqr(Ao, b, c) @test actual_usymqr_bytes ≤ 1.1 * expected_usymqr_bytes + solver = UsymqrSolver(Ao, b) + usymqr!(solver, Ao, b, c) # warmup + inplace_usymqr_bytes = @allocated usymqr!(solver, Ao, b, c) + @test (VERSION < v"1.5") || (inplace_usymqr_bytes == 208) + # with (Ap, Aᵀq) preallocated, TRILQR needs: # - 5 m-vectors: vₖ₋₁, vₖ, t, wₖ₋₁, wₖ # - 4 n-vectors: uₖ₋₁, uₖ, x, d̅ @@ -261,6 +362,11 @@ function test_alloc() actual_trilqr_bytes = @allocated trilqr(A, b, b) @test actual_trilqr_bytes ≤ 1.1 * expected_trilqr_bytes + solver = TrilqrSolver(A, b) + trilqr!(solver, A, b, b) # warmup + inplace_trilqr_bytes = @allocated trilqr!(solver, A, b, b) + @test (VERSION < v"1.5") || (inplace_trilqr_bytes == 208) + # with (Ap, Aᵀq) preallocated, BILQ needs: # - 6 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅ storage_bilq(n) = 6 * n @@ -271,6 +377,11 @@ function test_alloc() actual_bilq_bytes = @allocated bilq(A, b) @test actual_bilq_bytes ≤ 1.1 * expected_bilq_bytes + solver = BilqSolver(A, b) + bilq!(solver, A, b) # warmup + inplace_bilq_bytes = @allocated bilq!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_bilq_bytes == 208) + # with (Ap, Aᵀq) preallocated, BILQR needs: # - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₁, wₖ storage_bilqr(n) = 9 * n @@ -281,6 +392,11 @@ function test_alloc() actual_bilqr_bytes = @allocated bilqr(A, b, b) @test actual_bilqr_bytes ≤ 1.1 * expected_bilqr_bytes + solver = BilqrSolver(A, b) + bilqr!(solver, A, b, b) # warmup + inplace_bilqr_bytes = @allocated bilqr!(solver, A, b, b) + @test (VERSION < v"1.5") || (inplace_bilqr_bytes == 208) + # with Ap preallocated, MINRES-QLP needs: # - 5 n-vectors: wₖ₋₁, wₖ, vₖ₋₁, vₖ, x storage_minres_qlp(n) = 5 * n @@ -291,6 +407,11 @@ function test_alloc() actual_minres_qlp_bytes = @allocated minres_qlp(A, b) @test actual_minres_qlp_bytes ≤ 1.1 * expected_minres_qlp_bytes + solver = MinresQlpSolver(A, b) + minres_qlp!(solver, A, b) # warmup + inplace_minres_qlp_bytes = @allocated minres_qlp!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_minres_qlp_bytes == 208) + # with (Ap, Aᵗp) preallocated, QMR needs: # - 7 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ storage_qmr(n) = 7 * n @@ -301,6 +422,11 @@ function test_alloc() actual_qmr_bytes = @allocated qmr(A, b) @test actual_qmr_bytes ≤ 1.1 * expected_qmr_bytes + solver = QmrSolver(A, b) + qmr!(solver, A, b) # warmup + inplace_qmr_bytes = @allocated qmr!(solver, A, b) + @test (VERSION < v"1.5") || (inplace_qmr_bytes == 208) + # with (Ap, Aᵀp) preallocated, USYMLQ needs: # - 4 n-vectors: uₖ₋₁, uₖ, x, d̅ # - 2 m-vectors: vₖ₋₁, vₖ @@ -312,6 +438,11 @@ function test_alloc() actual_usymlq_bytes = @allocated usymlq(Au, c, b) @test actual_usymlq_bytes ≤ 1.1 * expected_usymlq_bytes + solver = UsymlqSolver(Au, c) + usymlq!(solver, Au, c, b) # warmup + inplace_usymlq_bytes = @allocated usymlq!(solver, Au, c, b) + @test (VERSION < v"1.5") || (inplace_usymlq_bytes == 208) + # with (Ap, Aᵀp) preallocated, TriCG needs: # - 5 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₁, gy₂ₖ # - 5 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₁, gx₂ₖ @@ -323,6 +454,11 @@ function test_alloc() actual_tricg_bytes = @allocated tricg(Au, c, b) @test actual_tricg_bytes ≤ 1.1 * expected_tricg_bytes + solver = TricgSolver(Au, c) + tricg!(solver, Au, c, b) # warmup + inplace_tricg_bytes = @allocated tricg!(solver, Au, c, b) + @test (VERSION < v"1.5") || (inplace_tricg_bytes == 208) + # with (Ap, Aᵀp) preallocated, TriMR needs: # - 7 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ # - 7 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ @@ -333,6 +469,11 @@ function test_alloc() trimr(Au, c, b) # warmup actual_trimr_bytes = @allocated trimr(Au, c, b) @test actual_trimr_bytes ≤ 1.1 * expected_trimr_bytes + + solver = TrimrSolver(Au, c) + trimr!(solver, Au, c, b) # warmup + inplace_trimr_bytes = @allocated trimr!(solver, Au, c, b) + @test (VERSION < v"1.5") || (inplace_trimr_bytes == 208) end @testset "alloc" begin diff --git a/test/test_aux.jl b/test/test_aux.jl index e8dcf16ab..c02981ee7 100644 --- a/test/test_aux.jl +++ b/test/test_aux.jl @@ -93,12 +93,17 @@ @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178 # test kzeros and kones - Krylov.kzeros(Vector{Float64}, 10) == zeros(10) - Krylov.kones(Vector{Float64}, 10) == ones(10) - - a = rand(10) - b = view(a, 1:4) - S = typeof(b) - Krylov.kzeros(S, 10) == zeros(10) - Krylov.kones(S, 10) == ones(10) + @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10) + @test Krylov.kones(Vector{Float64}, 10) == ones(10) + + # test ktypeof + a = sprand(Float32, 10, 0.5) + b = view(a, 4:8) + @test Krylov.ktypeof(a) == Vector{Float32} + @test Krylov.ktypeof(b) == Vector{Float32} + + a = sprand(Float64, 10, 0.5) + b = view(a, 4:8) + @test Krylov.ktypeof(a) == Vector{Float64} + @test Krylov.ktypeof(b) == Vector{Float64} end diff --git a/test/test_lslq.jl b/test/test_lslq.jl index ef11edc23..de5b1b91b 100644 --- a/test/test_lslq.jl +++ b/test/test_lslq.jl @@ -24,7 +24,7 @@ V, _ = qr(rand(4, 4)) A = U * [Σ ; zeros(2, 4)] * V' b = ones(6) - (x, x_cg, err_lbnds, err_ubnds_lq, err_ubnds_cg, stats) = lslq(A, b, σ=1 - 1.0e-10) + (x, x_cg, err_lbnds, err_ubnds_lq, err_ubnds_cg, stats) = lslq(A, b, σ=1 - 1.0e-10, history=true) @test isapprox(err_ubnds_lq[end], 0.0, atol=sqrt(eps(Float64))) @test isapprox(err_ubnds_cg[end], 0.0, atol=sqrt(eps(Float64))) x_exact = A \ b diff --git a/test/test_variants.jl b/test/test_variants.jl index ac6cbbce1..e0e6a3e87 100644 --- a/test/test_variants.jl +++ b/test/test_variants.jl @@ -9,8 +9,9 @@ A_sparse = convert(SparseMatrixCSC{T,S}, A_dense) b_dense = ones(T, 5) b_sparse = convert(SparseVector{T,S}, b_dense) + b_view = view(b_dense, 1:5) for A in (A_dense, A_sparse) - for b in (b_dense, b_sparse) + for b in (b_dense, b_sparse, b_view) if fn == :cg_lanczos_shift_seq shifts = [-one(T), one(T)] @eval $fn($A, $b, $shifts) @@ -19,7 +20,8 @@ elseif fn in (:usymlq, :usymqr, :tricg, :trimr, :trilqr, :bilqr) c_dense = ones(T, 5) c_sparse = convert(SparseVector{T,S}, c_dense) - for c in (c_dense, c_sparse) + c_view = view(c_dense, 1:5) + for c in (c_dense, c_sparse, c_view) @eval $fn($A, $b, $c) @eval $fn($transpose($A), $b, $c) @eval $fn($adjoint($A), $b, $c)