diff --git a/docs/src/api.md b/docs/src/api.md
index 419149b40..73c97bc80 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -11,8 +11,36 @@ Krylov.AdjointStats
 ## Solver Types
 
 ```@docs
-Krylov.KrylovSolver
-Krylov.MinresSolver
+KrylovSolver
+MinresSolver
+CgSolver
+CrSolver
+SymmlqSolver
+CgLanczosSolver
+CgLanczosShiftSolver
+MinresQlpSolver
+DqgmresSolver
+DiomSolver
+UsymlqSolver
+UsymqrSolver
+TricgSolver
+TrimrSolver
+TrilqrSolver
+CgsSolver
+BicgstabSolver
+BilqSolver
+QmrSolver
+BilqrSolver
+CglsSolver
+CrlsSolver
+CgneSolver
+CrmrSolver
+LslqSolver
+LsqrSolver
+LsmrSolver
+LnlqSolver
+CraigSolver
+CraigmrSolver
 ```
 
 ## Utilities
@@ -22,6 +50,7 @@ Krylov.roots_quadratic
 Krylov.sym_givens
 Krylov.to_boundary
 Krylov.vec2str
+Krylov.ktypeof
 Krylov.kzeros
 Krylov.kones
 ```
diff --git a/src/bicgstab.jl b/src/bicgstab.jl
index 715a2c79b..896664c3f 100644
--- a/src/bicgstab.jl
+++ b/src/bicgstab.jl
@@ -13,7 +13,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montréal, October 2020.
 
-export bicgstab
+export bicgstab, bicgstab!
 
 """
     (x, stats) = bicgstab(A, b::AbstractVector{T}; c::AbstractVector{T}=b,
@@ -41,9 +41,14 @@ This implementation allows a left preconditioner `M` and a right preconditioner
 * H. A. van der Vorst, *Bi-CGSTAB: A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems*, SIAM Journal on Scientific and Statistical Computing, 13(2), pp. 631--644, 1992.
 * G. L.G. Sleijpen and D. R. Fokkema, *BiCGstab(ℓ) for linear equations involving unsymmetric matrices with complex spectrum*, Electronic Transactions on Numerical Analysis, 1, pp. 11--32, 1993.
 """
-function bicgstab(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
-                  M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
-                  itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function bicgstab(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = BicgstabSolver(A, b)
+  bicgstab!(solver, A, b; kwargs...)
+end
+
+function bicgstab!(solver :: BicgstabSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
+                   M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
+                   itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   n, m = size(A)
   m == n || error("System must be square")
@@ -56,18 +61,19 @@ function bicgstab(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
-  # Determine the storage type of b
-  S = typeof(b)
-
   # Set up workspace.
-  x = kzeros(S, n)  # x₀
-  s = kzeros(S, n)  # s₀
-  v = kzeros(S, n)  # v₀
-  r = copy(M * b)   # r₀
-  p = copy(r)       # p₁
+  x, s, v, r, p = solver.x, solver.s, solver.v, solver.r, solver.p
+
+  x .= zero(T) # x₀
+  s .= zero(T) # s₀
+  v .= zero(T) # v₀
+  r .= (M * b) # r₀
+  p .= r       # p₁
 
   α = one(T) # α₀
   ω = one(T) # ω₀
diff --git a/src/bilq.jl b/src/bilq.jl
index 38ac9fc05..147a5757b 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -10,7 +10,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, February 2019.
 
-export bilq
+export bilq, bilq!
 
 """
     (x, stats) = bilq(A, b::AbstractVector{T}; c::AbstractVector{T}=b,
@@ -29,9 +29,14 @@ when it exists. The transfer is based on the residual norm.
 
 * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
 """
-function bilq(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
-              atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
-              itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function bilq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = BilqSolver(A, b)
+  bilq!(solver, A, b; kwargs...)
+end
+
+function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
+               atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
+               itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   n, m = size(A)
   m == n || error("System must be square")
@@ -40,15 +45,17 @@ function bilq(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.d̅
 
   # Initial solution x₀ and residual norm ‖r₀‖.
-  x = kzeros(S, n)
+  x .= zero(T)
   bNorm = @knrm2(n, b)  # ‖r₀‖
   bNorm == 0 && return (x, SimpleStats(true, false, [bNorm], T[], "x = 0 is a zero-residual solution"))
 
@@ -64,16 +71,15 @@ function bilq(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
   bᵗc = @kdot(n, b, c)  # ⟨b,c⟩
   bᵗc == 0 && return (x, SimpleStats(false, false, [bNorm], T[], "Breakdown bᵀc = 0"))
 
-  # Set up workspace.
   βₖ = √(abs(bᵗc))           # β₁γ₁ = bᵀc
   γₖ = bᵗc / βₖ              # β₁γ₁ = bᵀc
-  vₖ₋₁ = kzeros(S, n)        # v₀ = 0
-  uₖ₋₁ = kzeros(S, n)        # u₀ = 0
-  vₖ = b / βₖ                # v₁ = b / β₁
-  uₖ = c / γₖ                # u₁ = c / γ₁
+  vₖ₋₁ .= zero(T)            # v₀ = 0
+  uₖ₋₁ .= zero(T)            # u₀ = 0
+  vₖ .= b ./ βₖ              # v₁ = b / β₁
+  uₖ .= c ./ γₖ              # u₁ = c / γ₁
   cₖ₋₁ = cₖ = -one(T)        # Givens cosines used for the LQ factorization of Tₖ
   sₖ₋₁ = sₖ = zero(T)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ = kzeros(S, n)           # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
+  d̅ .= zero(T)               # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
   ζₖ₋₁ = ζbarₖ = zero(T)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
   ζₖ₋₂ = ηₖ = zero(T)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
   δbarₖ₋₁ = δbarₖ = zero(T)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
diff --git a/src/bilqr.jl b/src/bilqr.jl
index c44675ac2..8da8ee095 100644
--- a/src/bilqr.jl
+++ b/src/bilqr.jl
@@ -10,7 +10,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, July 2019.
 
-export bilqr
+export bilqr, bilqr!
 
 """
     (x, t, stats) = bilqr(A, b::AbstractVector{T}, c::AbstractVector{T};
@@ -32,9 +32,14 @@ BiCG point, when it exists. The transfer is based on the residual norm.
 
 * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
 """
-function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
-               atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
-               itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = BilqrSolver(A, b)
+  bilqr!(solver, A, b, c; kwargs...)
+end
+
+function bilqr!(solver :: BilqrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T};
+                atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
+                itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   n, m = size(A)
   m == n || error("Systems must be square")
@@ -44,19 +49,21 @@ function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.t, solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂
 
   # Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖.
-  x = kzeros(S, n)      # x₀
+  x .= zero(T)          # x₀
   bNorm = @knrm2(n, b)  # rNorm = ‖r₀‖
 
   # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀt₀‖.
-  t = kzeros(S, n)      # t₀
+  t .= zero(T)          # t₀
   cNorm = @knrm2(n, c)  # sNorm = ‖s₀‖
 
   iter = 0
@@ -76,21 +83,21 @@ function bilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   # Set up workspace.
   βₖ = √(abs(bᵗc))           # β₁γ₁ = bᵀc
   γₖ = bᵗc / βₖ              # β₁γ₁ = bᵀc
-  vₖ₋₁ = kzeros(S, n)        # v₀ = 0
-  uₖ₋₁ = kzeros(S, n)        # u₀ = 0
-  vₖ = b / βₖ                # v₁ = b / β₁
-  uₖ = c / γₖ                # u₁ = c / γ₁
+  vₖ₋₁ .= zero(T)            # v₀ = 0
+  uₖ₋₁ .= zero(T)            # u₀ = 0
+  vₖ .= b ./ βₖ              # v₁ = b / β₁
+  uₖ .= c ./ γₖ              # u₁ = c / γ₁
   cₖ₋₁ = cₖ = -one(T)        # Givens cosines used for the LQ factorization of Tₖ
   sₖ₋₁ = sₖ = zero(T)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ = kzeros(S, n)           # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
+  d̅ .= zero(T)               # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
   ζₖ₋₁ = ζbarₖ = zero(T)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
   ζₖ₋₂ = ηₖ = zero(T)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
   δbarₖ₋₁ = δbarₖ = zero(T)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
   ψbarₖ₋₁ = ψₖ₋₁ = zero(T)   # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
   norm_vₖ = bNorm / βₖ       # ‖vₖ‖ is used for residual norm estimates
   ϵₖ₋₃ = λₖ₋₂ = zero(T)      # Components of Lₖ₋₁
-  wₖ₋₃ = kzeros(S, n)        # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ
-  wₖ₋₂ = kzeros(S, n)        # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ
+  wₖ₋₃ .= zero(T)            # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ
+  wₖ₋₂ .= zero(T)            # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ
   τₖ = zero(T)               # τₖ is used for the dual residual norm estimate
 
   # Stopping criterion.
diff --git a/src/cg.jl b/src/cg.jl
index e839f2e41..723ee168b 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -13,7 +13,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Salt Lake City, UT, March 2015.
 
-export cg
+export cg, cg!
 
 
 """
@@ -37,10 +37,15 @@ with `n = length(b)`.
 
 * M. R. Hestenes and E. Stiefel, *Methods of conjugate gradients for solving linear systems*, Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952.
 """
-function cg(A, b :: AbstractVector{T};
-            M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
-            itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false,
-            verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function cg(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CgSolver(A, b)
+  cg!(solver, A, b; kwargs...)
+end
+
+function cg!(solver :: CgSolver{T,S}, A, b :: AbstractVector{T};
+             M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
+             itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false,
+             verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0")
 
@@ -50,16 +55,16 @@ function cg(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, r, p = solver.x, solver.r, solver.p
 
-  # Initial state.
-  x = kzeros(S, n)
-  r = copy(b)
+  x .= zero(T)
+  r .= b
   z = M * r
-  p = copy(z)
+  p .= z
   γ = @kdot(n, r, z)
   γ == 0 && return x, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution")
 
diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl
index 1f44a04b7..07e632c64 100644
--- a/src/cg_lanczos.jl
+++ b/src/cg_lanczos.jl
@@ -11,7 +11,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Princeton, NJ, March 2015.
 
-export cg_lanczos, cg_lanczos_shift_seq
+export cg_lanczos, cg_lanczos!, cg_lanczos_shift_seq, cg_lanczos_shift_seq!
 
 
 """
@@ -34,37 +34,43 @@ assumed to be symmetric and positive definite.
 * A. Frommer and P. Maass, *Fast CG-Based Methods for Tikhonov-Phillips Regularization*, SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999.
 * C. C. Paige and M. A. Saunders, *Solution of Sparse Indefinite Systems of Linear Equations*, SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975.
 """
-function cg_lanczos(A, b :: AbstractVector{T};
-                    M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
-                    check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function cg_lanczos(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CgLanczosSolver(A, b)
+  cg_lanczos!(solver, A, b; kwargs...)
+end
+
+function cg_lanczos!(solver :: CgLanczosSolver{T,S}, A, b :: AbstractVector{T};
+                     M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+                     check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   n = size(b, 1)
   (size(A, 1) == n & size(A, 2) == n) || error("Inconsistent problem size")
   (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables\n", n, n)
 
-  # Determine the storage type of b
-  S = typeof(b)
-
   # Tests M == Iₙ
   MisI = isa(M, opEye)
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
+  # Set up workspace.
+  x, Mv, Mv_prev, p = solver.x, solver.Mv, solver.Mv_prev, solver.p
+
   # Initial state.
-  x = kzeros(S, n)          # x₀
-  Mv = copy(b)              # Mv₁ ← b
+  x .= zero(T)              # x₀
+  Mv .= b                   # Mv₁ ← b
   v = M * Mv                # v₁ = M⁻¹ * Mv₁
   β = sqrt(@kdot(n, v, Mv)) # β₁ = v₁ᵀ M v₁
   β == 0 && return x, LanczosStats(true, [zero(T)], false, zero(T), zero(T), "x = 0 is a zero-residual solution")
-  p = copy(v)
+  p .= v
 
   # Initialize Lanczos process.
   # β₁Mv₁ = b
   @kscal!(n, one(T)/β, v)          # v₁  ←  v₁ / β₁
   MisI || @kscal!(n, one(T)/β, Mv) # Mv₁ ← Mv₁ / β₁
-  Mv_prev = copy(Mv)
+  Mv_prev .= Mv
 
   iter = 0
   itmax == 0 && (itmax = 2 * n)
@@ -150,9 +156,14 @@ The method does _not_ abort if A + αI is not definite.
 A preconditioner M may be provided in the form of a linear operator and is
 assumed to be symmetric and positive definite.
 """
-function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVector{T};
-                              M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
-                              check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CgLanczosShiftSolver(A, b, shifts)
+  cg_lanczos_shift_seq!(solver, A, b, shifts; kwargs...)
+end
+
+function cg_lanczos_shift_seq!(solver :: CgLanczosShiftSolver{T,S}, A, b :: AbstractVector{T}, shifts :: AbstractVector{T};
+                               M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+                               check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   n = size(b, 1)
   (size(A, 1) == n & size(A, 2) == n) || error("Inconsistent problem size")
@@ -160,52 +171,62 @@ function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVecto
   nshifts = size(shifts, 1)
   (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts)
 
-  # Determine the storage type of b
-  S = typeof(b)
-
   # Tests M == Iₙ
   MisI = isa(M, opEye)
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(shifts) == S || error("ktypeof(shifts) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
+  # Set up workspace.
+  Mv, Mv_prev, x, p, σ, δhat, ω, γ = solver.Mv, solver.Mv_prev, solver.x, solver.p, solver.σ, solver.δhat, solver.ω, solver.γ
+  rNorms, indefinite, converged, not_cv = solver.rNorms, solver.indefinite, solver.converged, solver.not_cv
+
   # Initial state.
   ## Distribute x similarly to shifts.
-  x = [kzeros(S, n) for i = 1 : nshifts]  # x₀
-  Mv = copy(b)                            # Mv₁ ← b
+  for i = 1 : nshifts
+    x[i] .= zero(T)                       # x₀
+  end
+  Mv .= b                                 # Mv₁ ← b
   v = M * Mv                              # v₁ = M⁻¹ * Mv₁
   β = sqrt(@kdot(n, v, Mv))               # β₁ = v₁ᵀ M v₁
   β == 0 && return x, LanczosStats(true, [zero(T)], false, zero(T), zero(T), "x = 0 is a zero-residual solution")
 
   # Initialize each p to v.
-  p = [copy(v) for i = 1 : nshifts]
+  for i = 1 : nshifts
+    p[i] .= v
+  end
 
   # Initialize Lanczos process.
   # β₁Mv₁ = b
   @kscal!(n, one(T)/β, v)          # v₁  ←  v₁ / β₁
   MisI || @kscal!(n, one(T)/β, Mv) # Mv₁ ← Mv₁ / β₁
-  Mv_prev = copy(Mv)
+  Mv_prev .= Mv
 
   # Initialize some constants used in recursions below.
   ρ = one(T)
-  σ = β * ones(T, nshifts)
-  δhat = zeros(T, nshifts)
-  ω = zeros(T, nshifts)
-  γ = ones(T, nshifts)
+  σ .= β
+  δhat .= zero(T)
+  ω .= zero(T)
+  γ .= one(T)
 
   # Define stopping tolerance.
-  rNorms = β * ones(T, nshifts)
+  rNorms .= β
   rNorms_history = history ? [rNorms;] : T[]
   ε = atol + rtol * β
 
   # Keep track of shifted systems that have converged.
-  converged = rNorms .≤ ε
+  for i = 1 : nshifts
+    converged[i] = rNorms[i] ≤ ε
+    not_cv[i] = !converged[i]
+  end
   iter = 0
   itmax == 0 && (itmax = 2 * n)
 
   # Keep track of shifted systems with negative curvature if required.
-  indefinite = falses(nshifts)
+  indefinite .= false
 
   # Build format strings for printing.
   if display(iter, verbose)
@@ -215,7 +236,7 @@ function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVecto
     local_printf(iter, rNorms...)
   end
 
-  solved = all(converged)
+  solved = sum(not_cv) == 0
   tired = iter ≥ itmax
   status = "unknown"
 
@@ -241,34 +262,39 @@ function cg_lanczos_shift_seq(A, b :: AbstractVector{T}, shifts :: AbstractVecto
     MisI || (ρ = @kdot(n, v, v))
     for i = 1 : nshifts
       δhat[i] = δ + ρ * shifts[i]
-      γ[i] = 1 ./ (δhat[i] - ω[i] ./ γ[i])
+      γ[i] = 1 / (δhat[i] - ω[i] / γ[i])
+    end
+    for i = 1 : nshifts
+      indefinite[i] |= γ[i] ≤ 0
     end
-    indefinite .|= (γ .≤ 0)
 
     # Compute next CG iterate for each shifted system that has not yet converged.
     # Stop iterating on indefinite problems if requested.
-    not_cv = check_curvature ? findall(.! (converged .| indefinite)) : findall(.! converged)
-
-    for i in not_cv
-      @kaxpy!(n, γ[i], p[i], x[i])
-      ω[i] = β * γ[i]
-      σ[i] *= -ω[i]
-      ω[i] *= ω[i]
-      @kaxpby!(n, σ[i], v, ω[i], p[i])
-
-      # Update list of systems that have converged.
-      rNorms[i] = abs(σ[i])
-      converged[i] = rNorms[i] ≤ ε
+    for i = 1 : nshifts
+      not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+      if not_cv[i]
+        @kaxpy!(n, γ[i], p[i], x[i])
+        ω[i] = β * γ[i]
+        σ[i] *= -ω[i]
+        ω[i] *= ω[i]
+        @kaxpby!(n, σ[i], v, ω[i], p[i])
+
+        # Update list of systems that have not converged.
+        rNorms[i] = abs(σ[i])
+        converged[i] = rNorms[i] ≤ ε
+      end
     end
 
     length(not_cv) > 0 && history && append!(rNorms_history, rNorms)
 
     # Is there a better way than to update this array twice per iteration?
-    not_cv = check_curvature ? findall(.! (converged .| indefinite)) : findall(.! converged)
+    for i = 1 : nshifts
+      not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
+    end
     iter = iter + 1
     display(iter, verbose) && local_printf(iter, rNorms...)
 
-    solved = length(not_cv) == 0
+    solved = sum(not_cv) == 0
     tired = iter ≥ itmax
   end
   (verbose > 0) && @printf("\n")
diff --git a/src/cgls.jl b/src/cgls.jl
index d79339158..ea873570c 100644
--- a/src/cgls.jl
+++ b/src/cgls.jl
@@ -26,7 +26,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Princeton, NJ, March 2015.
 
-export cgls
+export cgls, cgls!
 
 
 """
@@ -54,9 +54,14 @@ but simpler to implement.
 * M. R. Hestenes and E. Stiefel. *Methods of conjugate gradients for solving linear systems*, Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952.
 * A. Björck, T. Elfving and Z. Strakos, *Stability of Conjugate Gradient and Lanczos Methods for Linear Least Squares Problems*, SIAM Journal on Matrix Analysis and Applications, 19(3), pp. 720--736, 1998.
 """
-function cgls(A, b :: AbstractVector{T};
-              M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-              radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function cgls(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CglsSolver(A, b)
+  cgls!(solver, A, b; kwargs...)
+end
+
+function cgls!(solver :: CglsSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
+               radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -64,21 +69,22 @@ function cgls(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || eltype(M) == T || error("eltype(M) ≠ $T")
 
-  # Compute Aᵀ
+  # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, p, r = solver.x, solver.p, solver.r
 
-  x = kzeros(S, n)
-  r = copy(b)
+  x .= zero(T)
+  r .= b
   bNorm = @knrm2(m, r)   # Marginally faster than norm(b)
   bNorm == 0 && return x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution")
   Mr = M * r
   s = Aᵀ * Mr
-  p = copy(s)
+  p .= s
   γ = @kdot(n, s, s)  # Faster than γ = dot(s, s)
   iter = 0
   itmax == 0 && (itmax = m + n)
diff --git a/src/cgne.jl b/src/cgne.jl
index 890850155..78651835e 100644
--- a/src/cgne.jl
+++ b/src/cgne.jl
@@ -26,7 +26,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Montréal, QC, April 2015.
 
-export cgne
+export cgne, cgne!
 
 
 """
@@ -63,9 +63,14 @@ A preconditioner M may be provided in the form of a linear operator.
 * J. E. Craig, *The N-step iteration procedures*, Journal of Mathematics and Physics, 34(1), pp. 64--73, 1955.
 * J. E. Craig, *Iterations Procedures for Simultaneous Equations*, Ph.D. Thesis, Department of Electrical Engineering, MIT, 1954.
 """
-function cgne(A, b :: AbstractVector{T};
-              M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-              itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function cgne(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CgneSolver(A, b)
+  cgne!(solver, A, b; kwargs...)
+end
+
+function cgne!(solver :: CgneSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
+               itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -73,16 +78,17 @@ function cgne(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, p, r = solver.x, solver.p, solver.r
 
-  x = kzeros(S, n)
-  r = copy(b)
+  x .= zero(T)
+  r .= b
   z = M * r
   rNorm = @knrm2(m, r)   # Marginally faster than norm(r)
   rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution")
@@ -91,7 +97,7 @@ function cgne(A, b :: AbstractVector{T};
   # The following vector copy takes care of the case where A is a LinearOperator
   # with preallocation, so as to avoid overwriting vectors used later. In other
   # case, this should only add minimum overhead.
-  p = copy(Aᵀ * z)
+  p .= Aᵀ * z
 
   # Use ‖p‖ to detect inconsistent system.
   # An inconsistent system will necessarily have AA' singular.
diff --git a/src/cgs.jl b/src/cgs.jl
index 1ef4ddc0c..e67d96f73 100644
--- a/src/cgs.jl
+++ b/src/cgs.jl
@@ -8,7 +8,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, October 2018.
 
-export cgs
+export cgs, cgs!
 
 """
     (x, stats) = cgs(A, b::AbstractVector{T}; c::AbstractVector{T}=b,
@@ -38,9 +38,14 @@ This implementation allows a left preconditioner M and a right preconditioner N.
 
 * P. Sonneveld, *CGS, A Fast Lanczos-Type Solver for Nonsymmetric Linear systems*, SIAM Journal on Scientific and Statistical Computing, 10(1), pp. 36--52, 1989.
 """
-function cgs(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
-             M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
-             itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function cgs(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CgsSolver(A, b)
+  cgs!(solver, A, b; kwargs...)
+end
+
+function cgs!(solver :: CgsSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
+              M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
+              itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   m == n || error("System must be square")
@@ -49,15 +54,16 @@ function cgs(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
   isa(N, opEye) || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, r, u, p, q = solver.x, solver.r, solver.u, solver.p, solver.q
 
-  # Initial solution x₀ and residual r₀.
-  x = kzeros(S, n)  # x₀
-  r = copy(M * b)   # r₀
+  x .= zero(T)  # x₀
+  r .= M * b    # r₀
 
   # Compute residual norm ‖r₀‖₂.
   rNorm = @knrm2(n, r)
@@ -75,10 +81,9 @@ function cgs(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
   (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
   display(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
 
-  # Set up workspace.
-  u = copy(r)       # u₀
-  p = copy(r)       # p₀
-  q = kzeros(S, n)  # q₋₁
+  u .= r        # u₀
+  p .= r        # p₀
+  q .= zero(T)  # q₋₁
 
   # Stopping criterion.
   solved = rNorm ≤ ε
diff --git a/src/cr.jl b/src/cr.jl
index 4a18756ca..6d45353a4 100644
--- a/src/cr.jl
+++ b/src/cr.jl
@@ -9,7 +9,7 @@
 # Marie-Ange Dahito, <marie-ange.dahito@polymtl.ca>
 # Montreal, QC, June 2017
 
-export cr
+export cr, cr!
 
 """
     (x, stats) = cr(A, b::AbstractVector{T};
@@ -32,9 +32,14 @@ with `n = length(b)`.
 * M. R. Hestenes and E. Stiefel, *Methods of conjugate gradients for solving linear systems*, Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952.
 * M-A. Dahito and D. Orban, *The Conjugate Residual Method in Linesearch and Trust-Region Methods*, SIAM Journal on Optimization, 29(3), pp. 1988--2025, 2019.
 """
-function cr(A, b :: AbstractVector{T};
-            M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0,
-            radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false) where T <: AbstractFloat
+function cr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CrSolver(A, b)
+  cr!(solver, A, b; kwargs...)
+end
+
+function cr!(solver :: CrSolver{T,S}, A, b :: AbstractVector{T};
+             M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0,
+             radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false) where {S, T <: AbstractFloat}
 
   linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0")
   n = size(b, 1) # size of the problem
@@ -43,20 +48,21 @@ function cr(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace 
+  x, r, p, q = solver.x, solver.r, solver.p, solver.q
 
   # Initial state.
-  x = kzeros(S, n)  # initial estimation x = 0
+  x .= zero(T)  # initial estimation x = 0
   xNorm = zero(T)
-  r = copy(M * b)  # initial residual r = M * (b - Ax) = M * b
+  r .= M * b  # initial residual r = M * (b - Ax) = M * b
   Ar = A * r
   ρ = @kdot(n, r, Ar)
   ρ == 0 && return (x, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution"))
-  p = copy(r)
-  q = copy(Ar)
+  p .= r
+  q .= Ar
   (verbose > 0) && (m = zero(T)) # quadratic model
 
   iter = 0
diff --git a/src/craig.jl b/src/craig.jl
index 9893ffffc..0d1f2160b 100644
--- a/src/craig.jl
+++ b/src/craig.jl
@@ -30,7 +30,7 @@
 #
 # This implementation is strongly inspired from Mike Saunders's.
 
-export craig
+export craig, craig!
 
 
 """
@@ -76,10 +76,15 @@ In this implementation, both the x and y-parts of the solution are returned.
 * C. C. Paige and M. A. Saunders, *LSQR: An Algorithm for Sparse Linear Equations and Sparse Least Squares*, ACM Transactions on Mathematical Software, 8(1), pp. 43--71, 1982.
 * M. A. Saunders, *Solutions of Sparse Rectangular Systems Using LSQR and CRAIG*, BIT Numerical Mathematics, 35(4), pp. 588--604, 1995.
 """
-function craig(A, b :: AbstractVector{T};
-               M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
-               btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0,
-               verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false) where T <: AbstractFloat
+function craig(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CraigSolver(A, b)
+  craig!(solver, A, b; kwargs...)
+end
+
+function craig!(solver :: CraigSolver{T,S}, A, b :: AbstractVector{T};
+                M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
+                btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0,
+                verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -91,22 +96,23 @@ function craig(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, Nv, y, w, Mu = solver.x, solver.Nv, solver.y, solver.w, solver.Mu
 
-  x = kzeros(S, n)
-  y = kzeros(S, m)
+  x .= zero(T)
+  y .= zero(T)
 
   # When solving a SQD system, set regularization parameter λ = 1.
   sqd && (λ = one(T))
 
-  Mu = copy(b)
+  Mu .= b
   u = M * Mu
   β₁ = sqrt(@kdot(m, u, Mu))
   β₁ == 0 && return x, y, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution")
@@ -122,8 +128,8 @@ function craig(A, b :: AbstractVector{T};
   @kscal!(m, one(T)/β₁, u)
   MisI || @kscal!(m, one(T)/β₁, Mu)
 
-  Nv = kzeros(S, n)
-  w = kzeros(S, m)  # Used to update y.
+  Nv .= zero(T)
+  w .= zero(T)  # Used to update y.
 
   λ > 0 && (w2 = kzeros(S, n))
 
diff --git a/src/craigmr.jl b/src/craigmr.jl
index d469ce76b..7b266bb82 100644
--- a/src/craigmr.jl
+++ b/src/craigmr.jl
@@ -24,7 +24,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Montreal, QC, May 2015.
 
-export craigmr
+export craigmr, craigmr!
 
 
 """
@@ -69,9 +69,14 @@ returned.
 * D. Orban and M. Arioli. *Iterative Solution of Symmetric Quasi-Definite Linear Systems*, Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017.
 * D. Orban, *The Projected Golub-Kahan Process for Constrained, Linear Least-Squares Problems*. Cahier du GERAD G-2014-15, 2014.
 """
-function craigmr(A, b :: AbstractVector{T};
-                 M=opEye(), N=opEye(), λ :: T=zero(T), atol :: T=√eps(T),
-                 rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function craigmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CraigmrSolver(A, b)
+  craigmr!(solver, A, b; kwargs...)
+end
+
+function craigmr!(solver :: CraigmrSolver{T,S}, A, b :: AbstractVector{T};
+                  M=opEye(), N=opEye(), λ :: T=zero(T), atol :: T=√eps(T),
+                  rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -83,19 +88,20 @@ function craigmr(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, Nv, y, Mu, w, wbar = solver.x, solver.Nv, solver.y, solver.Mu, solver.w, solver.wbar
 
   # Compute y such that AAᵀy = b. Then recover x = Aᵀy.
-  x = kzeros(S, n)
-  y = kzeros(S, m)
-  Mu = copy(b)
+  x .= zero(T)
+  y .= zero(T)
+  Mu .= b
   u = M * Mu
   β = sqrt(@kdot(m, u, Mu))
   β == 0 && return (x, y, SimpleStats(true, false, [zero(T)], T[], "x = 0 is a zero-residual solution"))
@@ -106,7 +112,7 @@ function craigmr(A, b :: AbstractVector{T};
   MisI || @kscal!(m, one(T)/β, Mu)
   # α₁Nv₁ = Aᵀu₁.
   Aᵀu = Aᵀ * u
-  Nv = copy(Aᵀu)
+  Nv .= Aᵀu
   v = N * Nv
   α = sqrt(@kdot(n, v, Nv))
   Anorm² = α * α
@@ -134,9 +140,9 @@ function craigmr(A, b :: AbstractVector{T};
   ɛ_c = atol + rtol * rNorm  # Stopping tolerance for consistent systems.
   ɛ_i = atol + rtol * ArNorm  # Stopping tolerance for inconsistent systems.
 
-  wbar = copy(u)
+  wbar .= u
   @kscal!(m, one(T)/α, wbar)
-  w = kzeros(S, m)
+  w .= zero(T)
 
   status = "unknown"
   solved = rNorm ≤ ɛ_c
diff --git a/src/crls.jl b/src/crls.jl
index b2276b6df..1eff95312 100644
--- a/src/crls.jl
+++ b/src/crls.jl
@@ -18,7 +18,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Princeton, NJ, March 2015.
 
-export crls
+export crls, crls!
 
 
 """
@@ -45,9 +45,14 @@ but simpler to implement.
 
 * D. C.-L. Fong, *Minimum-Residual Methods for Sparse, Least-Squares using Golubg-Kahan Bidiagonalization*, Ph.D. Thesis, Stanford University, 2011.
 """
-function crls(A, b :: AbstractVector{T};
-              M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-              radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function crls(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CrlsSolver(A, b)
+  crls!(solver, A, b; kwargs...)
+end
+
+function crls!(solver :: CrlsSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
+               radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -55,26 +60,27 @@ function crls(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, p, Ar, r, Ap = solver.x, solver.p, solver.Ar, solver.r, solver.Ap
 
-  x = kzeros(S, n)
-  r  = copy(b)
+  x .= zero(T)
+  r .= b
   bNorm = @knrm2(m, r)  # norm(b - A * x0) if x0 ≠ 0.
   bNorm == 0 && return x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution")
 
   Mr = M * r
-  Ar = copy(Aᵀ * Mr)  # - λ * x0 if x0 ≠ 0.
+  Ar .= Aᵀ * Mr  # - λ * x0 if x0 ≠ 0.
   s  = A * Ar
   Ms = M * s
 
-  p  = copy(Ar)
-  Ap = copy(s)
+  p  .= Ar
+  Ap .= s
   q  = Aᵀ * Ms # Ap
   λ > 0 && @kaxpy!(n, λ, p, q)  # q = q + λ * p
   γ  = @kdot(m, s, Ms)  # Faster than γ = dot(s, Ms)
diff --git a/src/crmr.jl b/src/crmr.jl
index 441bed28d..fc469de05 100644
--- a/src/crmr.jl
+++ b/src/crmr.jl
@@ -24,7 +24,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Montreal, QC, April 2015.
 
-export crmr
+export crmr, crmr!
 
 
 """
@@ -61,9 +61,14 @@ A preconditioner M may be provided in the form of a linear operator.
 * D. Orban and M. Arioli, *Iterative Solution of Symmetric Quasi-Definite Linear Systems*, Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017.
 * D. Orban, *The Projected Golub-Kahan Process for Constrained Linear Least-Squares Problems*. Cahier du GERAD G-2014-15, 2014.
 """
-function crmr(A, b :: AbstractVector{T};
-              M=opEye(), λ :: T=zero(T), atol :: T=√eps(T),
-              rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function crmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = CrmrSolver(A, b)
+  crmr!(solver, A, b; kwargs...)
+end
+
+function crmr!(solver :: CrmrSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), λ :: T=zero(T), atol :: T=√eps(T),
+               rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -71,23 +76,24 @@ function crmr(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, p, r = solver.x, solver.p, solver.r
 
-  x = kzeros(S, n)  # initial estimation x = 0
-  r = copy(M * b)   # initial residual r = M * (b - Ax) = M * b
+  x .= zero(T)  # initial estimation x = 0
+  r .= M * b    # initial residual r = M * (b - Ax) = M * b
   bNorm = @knrm2(m, r)  # norm(b - A * x0) if x0 ≠ 0.
   bNorm == 0 && return x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution")
   rNorm = bNorm  # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
   λ > 0 && (s = copy(r))
   Aᵀr = Aᵀ * r # - λ * x0 if x0 ≠ 0.
-  p  = copy(Aᵀr)
-  γ  = @kdot(n, Aᵀr, Aᵀr)  # Faster than γ = dot(Aᵀr, Aᵀr)
+  p .= Aᵀr
+  γ = @kdot(n, Aᵀr, Aᵀr)  # Faster than γ = dot(Aᵀr, Aᵀr)
   λ > 0 && (γ += λ * rNorm * rNorm)
   iter = 0
   itmax == 0 && (itmax = m + n)
diff --git a/src/diom.jl b/src/diom.jl
index 272c59f0f..ee3167706 100644
--- a/src/diom.jl
+++ b/src/diom.jl
@@ -8,7 +8,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, September 2018.
 
-export diom
+export diom, diom!
 
 """
     (x, stats) = diom(A, b::AbstractVector{T};
@@ -31,9 +31,14 @@ This implementation allows a left preconditioner M and a right preconditioner N.
 
 * Y. Saad, *Practical use of some krylov subspace methods for solving indefinite and nonsymmetric linear systems*, SIAM journal on scientific and statistical computing, 5(1), pp. 203--228, 1984.
 """
-function diom(A, b :: AbstractVector{T};
-              M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
-              memory :: Int=20, pivoting :: Bool=false, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function diom(A, b :: AbstractVector{T}; memory :: Int=20, kwargs...) where T <: AbstractFloat
+  solver = DiomSolver(A, b, memory=memory)
+  diom!(solver, A, b; kwargs...)
+end
+
+function diom!(solver :: DiomSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+               pivoting :: Bool=false, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   m == n || error("System must be square")
@@ -42,16 +47,17 @@ function diom(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
   isa(N, opEye) || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, x_old, P, V, L, H, p = solver.x, solver.x_old, solver.P, solver.V, solver.L, solver.H, solver.p
 
   # Initial solution x₀ and residual r₀.
-  x = kzeros(S, n)  # x₀
-  x_old = copy(x)
-  r₀ = M * b      # M⁻¹(b - Ax₀)
+  x .= zero(T)  # x₀
+  x_old .= x
+  r₀ = M * b  # M⁻¹(b - Ax₀)
   # Compute β.
   rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
   rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution")
@@ -64,16 +70,17 @@ function diom(A, b :: AbstractVector{T};
   (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
   display(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
 
-  # Set up workspace.
-  mem = min(memory, itmax) # Memory.
-  V = [kzeros(S, n) for i = 1 : mem]  # Preconditioned Krylov vectors, orthogonal basis for {b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}.
-  P = [kzeros(S, n) for i = 1 : mem]  # Directions for x : Pₘ = Vₘ(Uₘ)⁻¹.
-  H = zeros(T, mem+2)                 # Last column of the band hessenberg matrix Hₘ = LₘUₘ.
+  mem = length(L)  # Memory
+  for i = 1 : mem
+    V[i] .= zero(T)  # Preconditioned Krylov vectors, orthogonal basis for {M⁻¹b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}.
+    P[i] .= zero(T)  # Directions for x : Pₘ = Vₘ(Uₘ)⁻¹.
+  end
+  H .= zero(T)  # Last column of the band hessenberg matrix Hₘ = LₘUₘ.
   # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
   # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
   # In addition of that, the last column of Uₘ is stored in H.
-  L = zeros(T, mem)        # Last mem Pivots of Lₘ.
-  p = BitArray(undef, mem) # Last mem permutations.
+  L .= zero(T)  # Last mem Pivots of Lₘ.
+  p .= false    # Last mem permutations.
 
   # Initial ξ₁ and V₁.
   ξ = rNorm
diff --git a/src/dqgmres.jl b/src/dqgmres.jl
index 635f5d43e..c32de22ce 100644
--- a/src/dqgmres.jl
+++ b/src/dqgmres.jl
@@ -8,7 +8,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, August 2018.
 
-export dqgmres
+export dqgmres, dqgmres!
 
 """
     (x, stats) = dqgmres(A, b::AbstractVector{T};
@@ -29,9 +29,14 @@ This implementation allows a left preconditioner M and a right preconditioner N.
 
 * Y. Saad and K. Wu, *DQGMRES: a quasi minimal residual algorithm based on incomplete orthogonalization*, Numerical Linear Algebra with Applications, Vol. 3(4), pp. 329--343, 1996.
 """
-function dqgmres(A, b :: AbstractVector{T};
-                 M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
-                 itmax :: Int=0, memory :: Int=20, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function dqgmres(A, b :: AbstractVector{T}; memory :: Int=20, kwargs...) where T <: AbstractFloat
+  solver = DqgmresSolver(A, b, memory=memory)
+  dqgmres!(solver, A, b; kwargs...)
+end
+
+function dqgmres!(solver :: DqgmresSolver{T,S}, A, b :: AbstractVector{T};
+                  M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
+                  itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   m == n || error("System must be square")
@@ -40,15 +45,16 @@ function dqgmres(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
   isa(N, opEye) || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, P, V, c, s, H = solver.x, solver.P, solver.V, solver.c, solver.s, solver.H
 
   # Initial solution x₀ and residual r₀.
-  x = kzeros(S, n)  # x₀
-  r₀ = M * b        # M⁻¹(b - Ax₀)
+  x .= zero(T)  # x₀
+  r₀ = M * b    # M⁻¹(b - Ax₀)
   # Compute β
   rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
   rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution")
@@ -62,12 +68,14 @@ function dqgmres(A, b :: AbstractVector{T};
   display(iter, verbose) && @printf("%5d  %7.1e\n", iter, rNorm)
 
   # Set up workspace.
-  mem = min(memory, itmax) # Memory.
-  V = [kzeros(S, n) for i = 1 : mem]  # Preconditioned Krylov vectors, orthogonal basis for {b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}.
-  P = [kzeros(S, n) for i = 1 : mem]  # Directions for x : Pₘ = Vₘ(Rₘ)⁻¹.
-  s = zeros(T, mem)                   # Last mem Givens sines used for the factorization QₘRₘ = Hₘ.
-  c = zeros(T, mem)                   # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ.
-  H = zeros(T, mem+2)                 # Last column of the band hessenberg matrix Hₘ.
+  mem = length(c)  # Memory.
+  for i = 1 : mem
+    V[i] .= zero(T)  # Preconditioned Krylov vectors, orthogonal basis for {M⁻¹b, M⁻¹AN⁻¹b, (M⁻¹AN⁻¹)²b, ..., (M⁻¹AN⁻¹)ᵐ⁻¹b}.
+    P[i] .= zero(T)  # Directions for x : Pₘ = Vₘ(Rₘ)⁻¹.
+  end
+  s .= zero(T)  # Last mem Givens sines used for the factorization QₘRₘ = Hₘ.
+  c .= zero(T)  # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ.
+  H .= zero(T)  # Last column of the band hessenberg matrix Hₘ.
   # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
   # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
   # In addition of that, the last column of Rₘ is also stored in H.
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index 54a8b7894..a7af49cfb 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -1,18 +1,22 @@
-export KrylovSolver, MinresSolver
+export KrylovSolver, MinresSolver, CgSolver, CrSolver, SymmlqSolver, CgLanczosSolver,
+CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver,
+UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver,
+BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver,
+LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver
 
 "Abstract type for using Krylov solvers in-place"
-abstract type KrylovSolver{T, S} end
+abstract type KrylovSolver{T,S} end
 
 """
 Type for storing the vectors required by the in-place version of MINRES.
 
-The outer constructor 
+The outer constructor
 
-    solver = MinresSolver(A, b :: AbstractVector{T}; window :: Int=5) where T <: AbstractFloat
+    solver = MinresSolver(A, b; window :: Int=5)
 
 may be used in order to create these vectors.
 """
-mutable struct MinresSolver{T, S} <: KrylovSolver{T, S}
+mutable struct MinresSolver{T,S} <: KrylovSolver{T,S}
   x       :: S
   r1      :: S
   r2      :: S
@@ -20,16 +24,912 @@ mutable struct MinresSolver{T, S} <: KrylovSolver{T, S}
   w2      :: S
   err_vec :: Vector{T}
   stats   :: SimpleStats{T}
+
+  function MinresSolver(A, b; window :: Int=5)
+    m, n = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x  = S(undef, n)
+    r1 = S(undef, n)
+    r2 = S(undef, n)
+    w1 = S(undef, n)
+    w2 = S(undef, n)
+    err_vec = zeros(T, window)
+    stats = SimpleStats(false, true, T[], T[], "unknown")
+    solver = new{T,S}(x, r1, r2, w1, w2, err_vec, stats)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CG.
+
+The outer constructor
+
+    solver = CgSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CgSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  r :: S
+  p :: S
+
+  function CgSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, n)
+    r = S(undef, n)
+    p = S(undef, n)
+    solver = new{T,S}(x, r, p)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CR.
+
+The outer constructor
+
+    solver = CrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CrSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  r :: S
+  p :: S
+  q :: S
+
+  function CrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, n)
+    r = S(undef, n)
+    p = S(undef, n)
+    q = S(undef, n)
+    solver = new{T,S}(x, r, p, q)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of SYMMLQ.
+
+The outer constructor
+
+    solver = SymmlqSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct SymmlqSolver{T,S} <: KrylovSolver{T,S}
+  x     :: S
+  Mvold :: S
+  Mv    :: S
+  w̅     :: S
+
+  function SymmlqSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x     = S(undef, n)
+    Mvold = S(undef, n)
+    Mv    = S(undef, n)
+    w̅     = S(undef, n)
+    solver = new{T,S}(x, Mvold, Mv, w̅)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CG-LANCZOS.
+
+The outer constructor
+
+    solver = CgLanczosSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CgLanczosSolver{T,S} <: KrylovSolver{T,S}
+  x       :: S
+  Mv      :: S
+  Mv_prev :: S
+  p       :: S
+
+  function CgLanczosSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x       = S(undef, n)
+    Mv      = S(undef, n)
+    Mv_prev = S(undef, n)
+    p       = S(undef, n)
+    solver = new{T,S}(x, Mv, Mv_prev, p)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CG-LANCZOS-SHIFT-SEQ.
+
+The outer constructor
+
+    solver = CgLanczosShiftSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CgLanczosShiftSolver{T,S} <: KrylovSolver{T,S}
+  Mv         :: S
+  Mv_prev    :: S
+  x          :: Vector{S}
+  p          :: Vector{S}
+  σ          :: Vector{T}
+  δhat       :: Vector{T}
+  ω          :: Vector{T}
+  γ          :: Vector{T}
+  rNorms     :: Vector{T}
+  indefinite :: BitArray
+  converged  :: BitArray
+  not_cv     :: BitArray
+
+  function CgLanczosShiftSolver(A, b, shifts)
+    n, m = size(A)
+    nshifts = length(shifts)
+    S = ktypeof(b)
+    T = eltype(b)
+    Mv         = S(undef, n)
+    Mv_prev    = S(undef, n)
+    x          = [S(undef, n) for i = 1 : nshifts]
+    p          = [S(undef, n) for i = 1 : nshifts]
+    σ          = Vector{T}(undef, nshifts)
+    δhat       = Vector{T}(undef, nshifts)
+    ω          = Vector{T}(undef, nshifts)
+    γ          = Vector{T}(undef, nshifts)
+    rNorms     = Vector{T}(undef, nshifts)
+    indefinite = BitArray(undef, nshifts)
+    converged  = BitArray(undef, nshifts)
+    not_cv     = BitArray(undef, nshifts)
+    solver = new{T,S}(Mv, Mv_prev, x, p, σ, δhat, ω, γ, rNorms, indefinite, converged, not_cv)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of MINRES-QLP.
+
+The outer constructor
+
+    solver = MinresQlpSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct MinresQlpSolver{T,S} <: KrylovSolver{T,S}
+  wₖ₋₁    :: S
+  wₖ      :: S
+  M⁻¹vₖ₋₁ :: S
+  M⁻¹vₖ   :: S
+  x       :: S
+
+  function MinresQlpSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    wₖ₋₁    = S(undef, n)
+    wₖ      = S(undef, n)
+    M⁻¹vₖ₋₁ = S(undef, n)
+    M⁻¹vₖ   = S(undef, n)
+    x       = S(undef, n)
+    solver = new{T,S}(wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x)
+    return solver
+  end
 end
 
-function MinresSolver(A, b :: AbstractVector{T}; window :: Int=5) where T <: AbstractFloat
-  m, n = size(A)
-  x = similar(b, n)
-  r1 = similar(b, n)
-  r2 = similar(b, n)
-  w1 = similar(b, n)
-  w2 = similar(b, n)
-  err_vec = zeros(T, window)
-  stats = SimpleStats(false, true, T[], T[], "unknown")
-  return MinresSolver(x, r1, r2, w1, w2, err_vec, stats)
+"""
+Type for storing the vectors required by the in-place version of DQGMRES.
+
+The outer constructor
+
+    solver = DqgmresSolver(A, b; memory :: Integer=20)
+
+may be used in order to create these vectors.
+"""
+mutable struct DqgmresSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  P :: Vector{S}
+  V :: Vector{S}
+  c :: Vector{T}
+  s :: Vector{T}
+  H :: Vector{T}
+
+  function DqgmresSolver(A, b; memory :: Integer=20)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, n)
+    P = [S(undef, n) for i = 1 : memory]
+    V = [S(undef, n) for i = 1 : memory]
+    c = Vector{T}(undef, memory)
+    s = Vector{T}(undef, memory)
+    H = Vector{T}(undef, memory+2)
+    solver = new{T,S}(x, P, V, c, s, H)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of DIOM.
+
+The outer constructor
+
+    solver = DiomSolver(A, b; memory :: Integer=20)
+
+may be used in order to create these vectors.
+"""
+mutable struct DiomSolver{T,S} <: KrylovSolver{T,S}
+  x     :: S
+  x_old :: S
+  P     :: Vector{S}
+  V     :: Vector{S}
+  L     :: Vector{T}
+  H     :: Vector{T}
+  p     :: BitArray
+
+  function DiomSolver(A, b; memory :: Integer=20)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x     = S(undef, n)
+    x_old = S(undef, n)
+    P     = [S(undef, n) for i = 1 : memory]
+    V     = [S(undef, n) for i = 1 : memory]
+    L     = Vector{T}(undef, memory)
+    H     = Vector{T}(undef, memory+2)
+    p     = BitArray(undef, memory)
+    solver = new{T,S}(x, x_old, P, V, L, H, p)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of USYMLQ.
+
+The outer constructor
+
+    solver = UsymlqSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct UsymlqSolver{T,S} <: KrylovSolver{T,S}
+  uₖ₋₁ :: S
+  uₖ   :: S
+  x    :: S
+  d̅    :: S
+  vₖ₋₁ :: S
+  vₖ   :: S
+
+  function UsymlqSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    uₖ₋₁ = S(undef, m)
+    uₖ   = S(undef, m)
+    x    = S(undef, m)
+    d̅    = S(undef, m)
+    vₖ₋₁ = S(undef, n)
+    vₖ   = S(undef, n)
+    solver = new{T,S}(uₖ₋₁, uₖ, x, d̅, vₖ₋₁, vₖ)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of USYMQR.
+
+The outer constructor
+
+    solver = UsymqrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct UsymqrSolver{T,S} <: KrylovSolver{T,S}
+  vₖ₋₁ :: S
+  vₖ   :: S
+  x    :: S
+  wₖ₋₂ :: S
+  wₖ₋₁ :: S
+  uₖ₋₁ :: S
+  uₖ   :: S
+
+  function UsymqrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    vₖ₋₁ = S(undef, n)
+    vₖ   = S(undef, n)
+    x    = S(undef, m)
+    wₖ₋₂ = S(undef, m)
+    wₖ₋₁ = S(undef, m)
+    uₖ₋₁ = S(undef, m)
+    uₖ   = S(undef, m)
+    solver = new{T,S}(vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of TRICG.
+
+The outer constructor
+
+    solver = TricgSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct TricgSolver{T,S} <: KrylovSolver{T,S}
+  yₖ      :: S
+  N⁻¹uₖ₋₁ :: S
+  N⁻¹uₖ   :: S
+  gy₂ₖ₋₁  :: S
+  gy₂ₖ    :: S
+  xₖ      :: S
+  M⁻¹vₖ₋₁ :: S
+  M⁻¹vₖ   :: S
+  gx₂ₖ₋₁  :: S
+  gx₂ₖ    :: S
+
+  function TricgSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    yₖ      = S(undef, m)
+    N⁻¹uₖ₋₁ = S(undef, m)
+    N⁻¹uₖ   = S(undef, m)
+    gy₂ₖ₋₁  = S(undef, m)
+    gy₂ₖ    = S(undef, m)
+    xₖ      = S(undef, n)
+    M⁻¹vₖ₋₁ = S(undef, n)
+    M⁻¹vₖ   = S(undef, n)
+    gx₂ₖ₋₁  = S(undef, n)
+    gx₂ₖ    = S(undef, n)
+    solver  = new{T,S}(yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, gy₂ₖ₋₁, gy₂ₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, gx₂ₖ₋₁, gx₂ₖ)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of TRIMR.
+
+The outer constructor
+
+    solver = TrimrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct TrimrSolver{T,S} <: KrylovSolver{T,S}
+  yₖ      :: S
+  N⁻¹uₖ₋₁ :: S
+  N⁻¹uₖ   :: S
+  gy₂ₖ₋₃  :: S
+  gy₂ₖ₋₂  :: S
+  gy₂ₖ₋₁  :: S
+  gy₂ₖ    :: S
+  xₖ      :: S
+  M⁻¹vₖ₋₁ :: S
+  M⁻¹vₖ   :: S
+  gx₂ₖ₋₃  :: S
+  gx₂ₖ₋₂  :: S
+  gx₂ₖ₋₁  :: S
+  gx₂ₖ    :: S
+
+  function TrimrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    yₖ      = S(undef, m)
+    N⁻¹uₖ₋₁ = S(undef, m)
+    N⁻¹uₖ   = S(undef, m)
+    gy₂ₖ₋₃  = S(undef, m)
+    gy₂ₖ₋₂  = S(undef, m)
+    gy₂ₖ₋₁  = S(undef, m)
+    gy₂ₖ    = S(undef, m)
+    xₖ      = S(undef, n)
+    M⁻¹vₖ₋₁ = S(undef, n)
+    M⁻¹vₖ   = S(undef, n)
+    gx₂ₖ₋₃  = S(undef, n)
+    gx₂ₖ₋₂  = S(undef, n)
+    gx₂ₖ₋₁  = S(undef, n)
+    gx₂ₖ    = S(undef, n)
+    solver = new{T,S}(yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of TRILQR.
+
+The outer constructor
+
+    solver = TrilqrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct TrilqrSolver{T,S} <: KrylovSolver{T,S}
+  uₖ₋₁ :: S
+  uₖ   :: S
+  vₖ₋₁ :: S
+  vₖ   :: S
+  x    :: S
+  t    :: S
+  d̅    :: S
+  wₖ₋₃ :: S
+  wₖ₋₂ :: S
+
+  function TrilqrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    uₖ₋₁ = S(undef, m)
+    uₖ   = S(undef, m)
+    vₖ₋₁ = S(undef, n)
+    vₖ   = S(undef, n)
+    x    = S(undef, m)
+    t    = S(undef, n)
+    d̅    = S(undef, m)
+    wₖ₋₃ = S(undef, n)
+    wₖ₋₂ = S(undef, n)
+    solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CGS.
+
+The outer constructor
+
+    solver = CgsSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CgsSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  r :: S
+  u :: S
+  p :: S
+  q :: S
+
+  function CgsSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, n)
+    r = S(undef, n)
+    u = S(undef, n)
+    p = S(undef, n)
+    q = S(undef, n)
+    solver = new{T,S}(x, r, u, p, q)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of BICGSTAB.
+
+The outer constructor
+
+    solver = BicgstabSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct BicgstabSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  r :: S
+  p :: S
+  v :: S
+  s :: S
+
+  function BicgstabSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, n)
+    r = S(undef, n)
+    p = S(undef, n)
+    v = S(undef, n)
+    s = S(undef, n)
+    solver = new{T,S}(x, r, p, v, s)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of BILQ.
+
+The outer constructor
+
+    solver = BilqSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct BilqSolver{T,S} <: KrylovSolver{T,S}
+  uₖ₋₁ :: S
+  uₖ   :: S
+  vₖ₋₁ :: S
+  vₖ   :: S
+  x    :: S
+  d̅    :: S
+
+  function BilqSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    uₖ₋₁ = S(undef, n)
+    uₖ   = S(undef, n)
+    vₖ₋₁ = S(undef, n)
+    vₖ   = S(undef, n)
+    x    = S(undef, n)
+    d̅    = S(undef, n)
+    solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of QMR.
+
+The outer constructor
+
+    solver = QmrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct QmrSolver{T,S} <: KrylovSolver{T,S}
+  uₖ₋₁ :: S
+  uₖ   :: S
+  vₖ₋₁ :: S
+  vₖ   :: S
+  x    :: S
+  wₖ₋₂ :: S
+  wₖ₋₁ :: S
+
+  function QmrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    uₖ₋₁ = S(undef, n)
+    uₖ   = S(undef, n)
+    vₖ₋₁ = S(undef, n)
+    vₖ   = S(undef, n)
+    x    = S(undef, n)
+    wₖ₋₂ = S(undef, n)
+    wₖ₋₁ = S(undef, n)
+    solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of BILQR.
+
+The outer constructor
+
+    solver = BilqrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct BilqrSolver{T,S} <: KrylovSolver{T,S}
+  uₖ₋₁ :: S
+  uₖ   :: S
+  vₖ₋₁ :: S
+  vₖ   :: S
+  x    :: S
+  t    :: S
+  d̅    :: S
+  wₖ₋₃ :: S
+  wₖ₋₂ :: S
+
+  function BilqrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    uₖ₋₁ = S(undef, n)
+    uₖ   = S(undef, n)
+    vₖ₋₁ = S(undef, n)
+    vₖ   = S(undef, n)
+    x    = S(undef, n)
+    t    = S(undef, n)
+    d̅    = S(undef, n)
+    wₖ₋₃ = S(undef, n)
+    wₖ₋₂ = S(undef, n)
+    solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CGLS.
+
+The outer constructor
+
+    solver = CglsSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CglsSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  p :: S
+  r :: S
+
+  function CglsSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, m)
+    p = S(undef, m)
+    r = S(undef, n)
+    solver = new{T,S}(x, p, r)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CRLS.
+
+The outer constructor
+
+    solver = CrlsSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CrlsSolver{T,S} <: KrylovSolver{T,S}
+  x  :: S
+  p  :: S
+  Ar :: S
+  r  :: S
+  Ap :: S
+
+  function CrlsSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x  = S(undef, m)
+    p  = S(undef, m)
+    Ar = S(undef, m)
+    r  = S(undef, n)
+    Ap = S(undef, n)
+    solver = new{T,S}(x, p, Ar, r, Ap)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CGNE.
+
+The outer constructor
+
+    solver = CgneSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CgneSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  p :: S
+  r :: S
+
+  function CgneSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, m)
+    p = S(undef, m)
+    r = S(undef, n)
+    solver = new{T,S}(x, p, r)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CRMR.
+
+The outer constructor
+
+    solver = CrmrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CrmrSolver{T,S} <: KrylovSolver{T,S}
+  x :: S
+  p :: S
+  r :: S
+
+  function CrmrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x = S(undef, m)
+    p = S(undef, m)
+    r = S(undef, n)
+    solver = new{T,S}(x, p, r)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of LSLQ.
+
+The outer constructor
+
+    solver = LslqSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct LslqSolver{T,S} <: KrylovSolver{T,S}
+  x_lq :: S
+  Nv   :: S
+  w̄    :: S
+  Mu   :: S
+
+  function LslqSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x_lq = S(undef, m)
+    Nv   = S(undef, m)
+    w̄    = S(undef, m)
+    Mu   = S(undef, n)
+    solver = new{T,S}(x_lq, Nv, w̄, Mu)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of LSQR.
+
+The outer constructor
+
+    solver = LsqrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct LsqrSolver{T,S} <: KrylovSolver{T,S}
+  x  :: S
+  Nv :: S
+  w  :: S
+  Mu :: S
+
+  function LsqrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x  = S(undef, m)
+    Nv = S(undef, m)
+    w  = S(undef, m)
+    Mu = S(undef, n)
+    solver = new{T,S}(x, Nv, w, Mu)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of LSMR.
+
+The outer constructor
+
+    solver = LsmrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct LsmrSolver{T,S} <: KrylovSolver{T,S}
+  x    :: S
+  Nv   :: S
+  h    :: S
+  hbar :: S
+  Mu   :: S
+
+  function LsmrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x    = S(undef, m)
+    Nv   = S(undef, m)
+    h    = S(undef, m)
+    hbar = S(undef, m)
+    Mu   = S(undef, n)
+    solver = new{T,S}(x, Nv, h, hbar, Mu)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of LNLQ.
+
+The outer constructor
+
+    solver = LnlqSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct LnlqSolver{T,S} <: KrylovSolver{T,S}
+  x  :: S
+  Nv :: S
+  y  :: S
+  w̄  :: S
+  Mu :: S
+
+  function LnlqSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x  = S(undef, m)
+    Nv = S(undef, m)
+    y  = S(undef, n)
+    w̄  = S(undef, n)
+    Mu = S(undef, n)
+    solver = new{T,S}(x, Nv, y, w̄, Mu)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CRAIG.
+
+The outer constructor
+
+    solver = CraigSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CraigSolver{T,S} <: KrylovSolver{T,S}
+  x  :: S
+  Nv :: S
+  y  :: S
+  w  :: S
+  Mu :: S
+
+  function CraigSolver(A, b)
+    n, m = size(A)
+    S  = ktypeof(b)
+    T  = eltype(b)
+    x  = S(undef, m)
+    Nv = S(undef, m)
+    y  = S(undef, n)
+    w  = S(undef, n)
+    Mu = S(undef, n)
+    solver = new{T,S}(x, Nv, y, w, Mu)
+    return solver
+  end
+end
+
+"""
+Type for storing the vectors required by the in-place version of CRAIGMR.
+
+The outer constructor
+
+    solver = CraigmrSolver(A, b)
+
+may be used in order to create these vectors.
+"""
+mutable struct CraigmrSolver{T,S} <: KrylovSolver{T,S}
+  x    :: S
+  Nv   :: S
+  y    :: S
+  Mu   :: S
+  w    :: S
+  wbar :: S
+
+  function CraigmrSolver(A, b)
+    n, m = size(A)
+    S = ktypeof(b)
+    T = eltype(b)
+    x    = S(undef, m)
+    Nv   = S(undef, m)
+    y    = S(undef, n)
+    Mu   = S(undef, n)
+    w    = S(undef, n)
+    wbar = S(undef, n)
+    solver = new{T,S}(x, Nv, y, Mu, w, wbar)
+    return solver
+  end
 end
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index ab11c9ee3..6d0f6d39e 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -166,18 +166,35 @@ function to_boundary(x :: Vector{T}, d :: Vector{T},
   return roots # `σ1` and `σ2`
 end
 
+"""
+    S = ktypeof(v)
+
+Return a dense storage type `S` based on the type of `v`.
+"""
+function ktypeof(v :: AbstractVector)
+  S = typeof(v)
+  if S <: SubArray
+    S = S.types[1]  # SubArray
+  end
+  if S <: AbstractSparseVector
+    S = S <: SparseVector ? S.types[3] : S.types[2]  # SparseVector / CuSparseVector
+  end
+  return S
+end
+
 """
     v = kzeros(S, n)
 
 Create an AbstractVector of storage type `S` of length `n` only composed of zero.
 """
-@inline kzeros(S, n) = S <: SubArray ? fill!(S.types[1](undef, n), zero(eltype(S))) : fill!(S(undef, n), zero(eltype(S)))
+@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
+
 """
     v = kones(S, n)
 
 Create an AbstractVector of storage type `S` of length `n` only composed of one.
 """
-@inline kones(S, n) = S <: SubArray ? fill!(S.types[1](undef, n), one(eltype(S))) : fill!(similar(S, n), one(eltype(S)))
+@inline kones(S, n) = fill!(S(undef, n), one(eltype(S)))
 
 @inline display(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
 
diff --git a/src/lnlq.jl b/src/lnlq.jl
index ef1cfedd6..1fce5d8ce 100644
--- a/src/lnlq.jl
+++ b/src/lnlq.jl
@@ -22,7 +22,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montréal, March 2019 -- Alès, January 2020.
 
-export lnlq
+export lnlq, lnlq!
 
 """
     (x, y, stats) = lnlq(A, b::AbstractVector{T};
@@ -64,10 +64,15 @@ In this implementation, both the x and y-parts of the solution are returned.
 
 * R. Estrin, D. Orban, M.A. Saunders, *LNLQ: An Iterative Method for Least-Norm Problems with an Error Minimization Property*, SIAM Journal on Matrix Analysis and Applications, 40(3), pp. 1102--1124, 2019.
 """
-function lnlq(A, b :: AbstractVector{T};
-              M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T),
-              atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
-              transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function lnlq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = LnlqSolver(A, b)
+  lnlq!(solver, A, b; kwargs...)
+end
+
+function lnlq!(solver :: LnlqSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T),
+               atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+               transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   length(b) == m || error("Inconsistent problem size")
@@ -79,18 +84,19 @@ function lnlq(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, Nv, y, w̄, Mu = solver.x, solver.Nv, solver.y, solver.w̄, solver.Mu
 
   # Initial solutions (x₀, y₀) and residual norm ‖r₀‖.
-  x = kzeros(S, n)
-  y = kzeros(S, m)
+  x .= zero(T)
+  y .= zero(T)
 
   # When solving a SQD system, set regularization parameter λ = 1.
   sqd && (λ = one(T))
@@ -112,7 +118,7 @@ function lnlq(A, b :: AbstractVector{T};
 
   # Initialize generalized Golub-Kahan bidiagonalization.
   # β₁Mu₁ = b.
-  Mu = copy(b)
+  Mu .= b
   u  = M * Mu                 # u₁ = M⁻¹ * Mu₁
   βₖ = sqrt(@kdot(m, u, Mu))  # β₁ = ‖u₁‖_M
   if βₖ ≠ 0
@@ -122,7 +128,7 @@ function lnlq(A, b :: AbstractVector{T};
 
   # α₁Nv₁ = Aᵀu₁.
   Aᵀu = Aᵀ * u
-  Nv  = copy(Aᵀu)
+  Nv .= Aᵀu
   v   = N * Nv                 # v₁ = N⁻¹ * Nv₁
   αₖ  = sqrt(@kdot(n, v, Nv))  # α₁ = ‖v₁‖_N
   if αₖ ≠ 0
@@ -130,8 +136,7 @@ function lnlq(A, b :: AbstractVector{T};
     NisI || @kscal!(n, 1 / αₖ, Nv)
   end
 
-  # Set up workspace.
-  w̄ = copy(u)        # Direction w̄₁
+  w̄ .= u             # Direction w̄₁
   cₖ = sₖ = zero(T)  # Givens sines and cosines used for the LQ factorization of (Lₖ)ᵀ
   ζₖ₋₁ = zero(T)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ
   ηₖ = zero(T)       # Coefficient of M̅ₖ
diff --git a/src/lslq.jl b/src/lslq.jl
index ac3eedb0d..f7f888920 100644
--- a/src/lslq.jl
+++ b/src/lslq.jl
@@ -1,7 +1,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Montreal, QC, November 2016-January 2017.
 
-export lslq
+export lslq, lslq!
 
 
 """
@@ -103,11 +103,16 @@ The iterations stop as soon as one of the following conditions holds true:
 * R. Estrin, D. Orban and M. A. Saunders, *Euclidean-norm error bounds for SYMMLQ and CG*, SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 235--253, 2019.
 * R. Estrin, D. Orban and M. A. Saunders, *LSLQ: An Iterative Method for Linear Least-Squares with an Error Minimization Property*, SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 254--275, 2019.
 """
-function lslq(A, b :: AbstractVector{T};
-              M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T),
-              atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T),
-              window :: Int=5, utol :: T=√eps(T), itmax :: Int=0,
-              σ :: T=zero(T), conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function lslq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = LslqSolver(A, b)
+  lslq!(solver, A, b; kwargs...)
+end
+
+function lslq!(solver :: LslqSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), N=opEye(), sqd :: Bool=false, λ :: T=zero(T),
+               atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T),
+               window :: Int=5, utol :: T=√eps(T), itmax :: Int=0,
+               σ :: T=zero(T), conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -119,28 +124,29 @@ function lslq(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x_lq, Nv, w̄, Mu = solver.x_lq, solver.Nv, solver.w̄, solver.Mu
 
   # If solving an SQD system, set regularization to 1.
   sqd && (λ = one(T))
   λ² = λ * λ
   ctol = conlim > 0 ? 1/conlim : zero(T)
 
-  x_lq = kzeros(S, n)   # LSLQ point
+  x_lq .= zero(T)  # LSLQ point
   err_lbnds = T[]
   err_ubnds_lq = T[]
   err_ubnds_cg = T[]
 
   # Initialize Golub-Kahan process.
   # β₁ M u₁ = b.
-  Mu = copy(b)
+  Mu .= b
   u = M * Mu
   β₁ = sqrt(@kdot(m, u, Mu))
   β₁ == 0 && return (x_lq, kzeros(S, n), err_lbnds, err_ubnds_lq, err_ubnds_cg,
@@ -150,7 +156,7 @@ function lslq(A, b :: AbstractVector{T};
   @kscal!(m, one(T)/β₁, u)
   MisI || @kscal!(m, one(T)/β₁, Mu)
   Aᵀu = Aᵀ * u
-  Nv = copy(Aᵀu)
+  Nv .= Aᵀu
   v = N * Nv
   α = sqrt(@kdot(n, v, Nv))  # = α₁
 
@@ -173,7 +179,7 @@ function lslq(A, b :: AbstractVector{T};
   xcgNorm  = zero(T)
   xcgNorm² = zero(T)
 
-  w̄ = copy(v) # w̄₁ = v₁
+  w̄ .= v  # w̄₁ = v₁
 
   err_lbnd = zero(T)
   err_vec = zeros(T, window)
@@ -301,7 +307,7 @@ function lslq(A, b :: AbstractVector{T};
 
     if σ > 0 && iter > 0
       err_ubnd_cg = sqrt(ζ̃ * ζ̃ - ζ̄  * ζ̄ )
-      push!(err_ubnds_cg, err_ubnd_cg)
+      history && push!(err_ubnds_cg, err_ubnd_cg)
       fwd_err_ubnd = err_ubnd_cg ≤ utol * sqrt(xcgNorm²)
     end
 
@@ -327,7 +333,7 @@ function lslq(A, b :: AbstractVector{T};
     err_vec[mod(iter, window) + 1] = ζ
     if iter ≥ window
       err_lbnd = norm(err_vec)
-      push!(err_lbnds, err_lbnd)
+      history && push!(err_lbnds, err_lbnd)
       fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm
     end
 
@@ -337,7 +343,7 @@ function lslq(A, b :: AbstractVector{T};
       ϵ̃ = -ω * c
       τ̃ = -τ * δ / ω
       ζ̃ = (τ̃ - ζ * η̃) / ϵ̃
-      push!(err_ubnds_lq, abs(ζ̃ ))
+      history && push!(err_ubnds_lq, abs(ζ̃ ))
     end
 
     # Stopping conditions that do not depend on user input.
diff --git a/src/lsmr.jl b/src/lsmr.jl
index f7f61b373..e918d8d28 100644
--- a/src/lsmr.jl
+++ b/src/lsmr.jl
@@ -22,7 +22,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Montreal, QC, May 2015.
 
-export lsmr
+export lsmr, lsmr!
 
 
 """
@@ -72,13 +72,18 @@ In this case, `N` can still be specified and indicates the weighted norm in whic
 
 * D. C.-L. Fong and M. A. Saunders, *LSMR: An Iterative Algorithm for Sparse*, Least Squares Problems, SIAM Journal on Scientific Computing, 33(5), pp. 2950--2971, 2011.
 """
-function lsmr(A, b :: AbstractVector{T};
-              M=opEye(), N=opEye(), sqd :: Bool=false,
-              λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T),
-              atol :: T=zero(T), rtol :: T=zero(T),
-              etol :: T=√eps(T), window :: Int=5,
-              itmax :: Int=0, conlim :: T=1/√eps(T),
-              radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function lsmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = LsmrSolver(A, b)
+  lsmr!(solver, A, b; kwargs...)
+end
+
+function lsmr!(solver :: LsmrSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), N=opEye(), sqd :: Bool=false,
+               λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T),
+               atol :: T=zero(T), rtol :: T=zero(T),
+               etol :: T=√eps(T), window :: Int=5,
+               itmax :: Int=0, conlim :: T=1/√eps(T),
+               radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -87,29 +92,30 @@ function lsmr(A, b :: AbstractVector{T};
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
-
   # Tests M == Iₙ and N == Iₘ
   MisI = isa(M, opEye)
   NisI = isa(N, opEye)
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
+  # Set up workspace.
+  x, Nv, h, hbar, Mu = solver.x, solver.Nv, solver.h, solver.hbar, solver.Mu
+
   # If solving an SQD system, set regularization to 1.
   sqd && (λ = one(T))
   ctol = conlim > 0 ? 1/conlim : zero(T)
-  x = kzeros(S, n)
+  x .= zero(T)
 
   # Initialize Golub-Kahan process.
   # β₁ M u₁ = b.
-  Mu = copy(b)
+  Mu .= b
   u = M * Mu
   β₁ = sqrt(@kdot(m, u, Mu))
   β₁ == 0 && return (x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution"))
@@ -118,7 +124,7 @@ function lsmr(A, b :: AbstractVector{T};
   @kscal!(m, one(T)/β₁, u)
   MisI || @kscal!(m, one(T)/β₁, Mu)
   Aᵀu = Aᵀ * u
-  Nv = copy(Aᵀu)
+  Nv .= Aᵀu
   v = N * Nv
   α = sqrt(@kdot(n, v, Nv))
 
@@ -165,8 +171,8 @@ function lsmr(A, b :: AbstractVector{T};
   @kscal!(n, one(T)/α, v)
   NisI || @kscal!(n, one(T)/α, Nv)
 
-  h = copy(v)
-  hbar = kzeros(S, n)
+  h .= v
+  hbar .= zero(T)
 
   status = "unknown"
   on_boundary = false
diff --git a/src/lsqr.jl b/src/lsqr.jl
index 27f3a5709..3bc6f8aff 100644
--- a/src/lsqr.jl
+++ b/src/lsqr.jl
@@ -22,7 +22,7 @@
 # Dominique Orban, <dominique.orban@gerad.ca>
 # Montreal, QC, May 2015.
 
-export lsqr
+export lsqr, lsqr!
 
 
 """
@@ -72,13 +72,18 @@ In this case, `N` can still be specified and indicates the weighted norm in whic
 
 * C. C. Paige and M. A. Saunders, *LSQR: An Algorithm for Sparse Linear Equations and Sparse Least Squares*, ACM Transactions on Mathematical Software, 8(1), pp. 43--71, 1982.
 """
-function lsqr(A, b :: AbstractVector{T};
-              M=opEye(), N=opEye(), sqd :: Bool=false,
-              λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T),
-              atol :: T=zero(T), rtol :: T=zero(T),
-              etol :: T=√eps(T), window :: Int=5,
-              itmax :: Int=0, conlim :: T=1/√eps(T),
-              radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function lsqr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = LsqrSolver(A, b)
+  lsqr!(solver, A, b; kwargs...)
+end
+
+function lsqr!(solver :: LsqrSolver{T,S}, A, b :: AbstractVector{T};
+               M=opEye(), N=opEye(), sqd :: Bool=false,
+               λ :: T=zero(T), axtol :: T=√eps(T), btol :: T=√eps(T),
+               atol :: T=zero(T), rtol :: T=zero(T),
+               etol :: T=√eps(T), window :: Int=5,
+               itmax :: Int=0, conlim :: T=1/√eps(T),
+               radius :: T=zero(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   size(b, 1) == m || error("Inconsistent problem size")
@@ -90,24 +95,25 @@ function lsqr(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  x, Nv, w, Mu = solver.x, solver.Nv, solver.w, solver.Mu
 
   # If solving an SQD system, set regularization to 1.
   sqd && (λ = one(T))
   λ² = λ * λ
   ctol = conlim > 0 ? 1/conlim : zero(T)
-  x = kzeros(S, n)
+  x .= zeros(T)
 
   # Initialize Golub-Kahan process.
   # β₁ M u₁ = b.
-  Mu = copy(b)
+  Mu .= b
   u = M * Mu
   β₁ = sqrt(@kdot(m, u, Mu))
   β₁ == 0 && return (x, SimpleStats(true, false, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution"))
@@ -116,7 +122,7 @@ function lsqr(A, b :: AbstractVector{T};
   @kscal!(m, one(T)/β₁, u)
   MisI || @kscal!(m, one(T)/β₁, Mu)
   Aᵀu = Aᵀ * u
-  Nv = copy(Aᵀu)
+  Nv .= Aᵀu
   v = N * Nv
   Anorm² = @kdot(n, v, Nv)
   Anorm = sqrt(Anorm²)
@@ -143,7 +149,7 @@ function lsqr(A, b :: AbstractVector{T};
   α == 0 && return (x, SimpleStats(true, false, [β₁], [zero(T)], "x = 0 is a minimum least-squares solution"))
   @kscal!(n, one(T)/α, v)
   NisI || @kscal!(n, one(T)/α, Nv)
-  w = copy(v)
+  w .= v
 
   # Initialize other constants.
   ϕbar = β₁
diff --git a/src/minres.jl b/src/minres.jl
index 74f3ec7d9..d9ebad751 100644
--- a/src/minres.jl
+++ b/src/minres.jl
@@ -72,7 +72,7 @@ function minres(A, b :: AbstractVector{T}; window :: Int=5, kwargs...) where T <
   minres!(solver, A, b; kwargs...)
 end
 
-function minres!(solver :: MinresSolver{T, S}, A, b :: AbstractVector{T};
+function minres!(solver :: MinresSolver{T,S}, A, b :: AbstractVector{T};
                  M=opEye(), λ :: T=zero(T), atol :: T=√eps(T)/100, rtol :: T=√eps(T)/100, 
                  ratol :: T=zero(T), rrtol :: T=zero(T), etol :: T=√eps(T),
                  itmax :: Int=0, conlim :: T=1/√eps(T),
@@ -85,10 +85,12 @@ function minres!(solver :: MinresSolver{T, S}, A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   isa(M, opEye) || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
-  # get data from solver
+  # Set up workspace.
   x, r1, r2, w1, w2, err_vec, stats = solver.x, solver.r1, solver.r2, solver.w1, solver.w2, solver.err_vec, solver.stats
+
   window = length(err_vec)
   rNorms, ArNorms = stats.residuals, stats.Aresiduals
   !history && !isempty(rNorms) && (rNorms = T[])
diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl
index 2a29dfa7a..9f35268a1 100644
--- a/src/minres_qlp.jl
+++ b/src/minres_qlp.jl
@@ -14,7 +14,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, September 2019.
 
-export minres_qlp
+export minres_qlp, minres_qlp!
 
 """
     (x, stats) = minres_qlp(A, b::AbstractVector{T};
@@ -35,9 +35,14 @@ M also indicates the weighted norm in which residuals are measured.
 * S.-C. T. Choi, C. C. Paige and M. A. Saunders, *MINRES-QLP: A Krylov subspace method for indefinite or singular symmetric systems*, SIAM Journal on Scientific Computing, Vol. 33(4), pp. 1810--1836, 2011.
 * S.-C. T. Choi and M. A. Saunders, *Algorithm 937: MINRES-QLP for symmetric and Hermitian linear equations and least-squares problems*, ACM Transactions on Mathematical Software, 40(2), pp. 1--12, 2014.
 """
-function minres_qlp(A, b :: AbstractVector{T};
-                    M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), λ ::T=zero(T),
-                    itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function minres_qlp(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = MinresQlpSolver(A, b)
+  minres_qlp!(solver, A, b; kwargs...)
+end
+
+function minres_qlp!(solver :: MinresQlpSolver{T,S}, A, b :: AbstractVector{T};
+                     M=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T), λ ::T=zero(T),
+                     itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   n, m = size(A)
   m == n || error("System must be square")
@@ -49,16 +54,17 @@ function minres_qlp(A, b :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x = solver.wₖ₋₁, solver.wₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ, solver.x
 
   # Initial solution x₀
-  x = kzeros(S, n)
+  x .= zero(T)
 
   # β₁v₁ = Mb
-  M⁻¹vₖ = copy(b)
+  M⁻¹vₖ .= b
   vₖ = M * M⁻¹vₖ
   βₖ = sqrt(@kdot(n, vₖ, M⁻¹vₖ))
   if βₖ ≠ 0
@@ -80,14 +86,14 @@ function minres_qlp(A, b :: AbstractVector{T};
   display(iter, verbose) && @printf("%5d  %7.1e  %7s\n", iter, rNorm, "✗ ✗ ✗ ✗")
 
   # Set up workspace.
-  M⁻¹vₖ₋₁ = kzeros(S, n)
+  M⁻¹vₖ₋₁ .= zero(T)
   ζbarₖ   = βₖ
   ξₖ₋₁    = zero(T)
   τₖ₋₂    = τₖ₋₁ = τₖ = zero(T)
   ψbarₖ₋₂ = zero(T)
   μbisₖ₋₂ = μbarₖ₋₁ = zero(T)
-  wₖ₋₁  = kzeros(S, n)
-  wₖ    = kzeros(S, n)
+  wₖ₋₁ .= zero(T)
+  wₖ   .= zero(T)
   cₖ₋₂  = cₖ₋₁ = cₖ = zero(T)  # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
   sₖ₋₂  = sₖ₋₁ = sₖ = zero(T)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
 
diff --git a/src/qmr.jl b/src/qmr.jl
index 7579591b9..7704d7a63 100644
--- a/src/qmr.jl
+++ b/src/qmr.jl
@@ -18,7 +18,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, May 2019.
 
-export qmr
+export qmr, qmr!
 
 """
     (x, stats) = qmr(A, b::AbstractVector{T}; c::AbstractVector{T}=b,
@@ -36,9 +36,14 @@ When A is symmetric and b = c, QMR is equivalent to MINRES.
 * R. W. Freund and N. M. Nachtigal, *An implementation of the QMR method based on coupled two-term recurrences*, SIAM Journal on Scientific Computing, Vol. 15(2), pp. 313--337, 1994.
 * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
 """
-function qmr(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
-             atol :: T=√eps(T), rtol :: T=√eps(T),
-             itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function qmr(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = QmrSolver(A, b)
+  qmr!(solver, A, b; kwargs...)
+end
+
+function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
+              atol :: T=√eps(T), rtol :: T=√eps(T),
+              itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   n, m = size(A)
   m == n || error("System must be square")
@@ -47,16 +52,18 @@ function qmr(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.wₖ₋₂, solver.wₖ₋₁
 
   # Initial solution x₀ and residual norm ‖r₀‖.
-  x = kzeros(S, n)
-  rNorm = @knrm2(n, b)  # rNorm = ‖r₀‖
+  x .= zero(T)
+  rNorm = @knrm2(n, b)  # ‖r₀‖
   rNorm == 0 && return (x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution"))
 
   iter = 0
@@ -71,17 +78,16 @@ function qmr(A, b :: AbstractVector{T}; c :: AbstractVector{T}=b,
   bᵗc = @kdot(n, b, c)  # ⟨b,c⟩
   bᵗc == 0 && return (x, SimpleStats(false, false, [rNorm], T[], "Breakdown bᵀc = 0"))
 
-  # Set up uorkspace.
   βₖ = √(abs(bᵗc))            # β₁γ₁ = bᵀc
   γₖ = bᵗc / βₖ               # β₁γ₁ = bᵀc
-  vₖ₋₁ = kzeros(S, n)         # v₀ = 0
-  uₖ₋₁ = kzeros(S, n)         # u₀ = 0
-  vₖ = b / βₖ                 # v₁ = b / β₁
-  uₖ = c / γₖ                 # u₁ = c / γ₁
+  vₖ₋₁ .= zero(T)             # v₀ = 0
+  uₖ₋₁ .= zero(T)             # u₀ = 0
+  vₖ .= b ./ βₖ               # v₁ = b / β₁
+  uₖ .= c ./ γₖ               # u₁ = c / γ₁
   cₖ₋₂ = cₖ₋₁ = cₖ = zero(T)  # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
   sₖ₋₂ = sₖ₋₁ = sₖ = zero(T)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
-  wₖ₋₂ = kzeros(S, n)         # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
-  wₖ₋₁ = kzeros(S, n)         # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
+  wₖ₋₂ .= zero(T)             # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
+  wₖ₋₁ .= zero(T)             # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
   ζbarₖ = βₖ                  # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
   τₖ = @kdot(n, vₖ, vₖ)       # τₖ is used for the residual norm estimate
 
diff --git a/src/symmlq.jl b/src/symmlq.jl
index d32cd5203..32682c3d9 100644
--- a/src/symmlq.jl
+++ b/src/symmlq.jl
@@ -9,7 +9,7 @@
 #
 # Ron Estrin, <ronestrin756@gmail.com>
 
-export symmlq
+export symmlq, symmlq!
 
 
 """
@@ -35,34 +35,40 @@ assumed to be symmetric and positive definite.
 
 * C. C. Paige and M. A. Saunders, *Solution of Sparse Indefinite Systems of Linear Equations*, SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975.
 """
-function symmlq(A, b :: AbstractVector{T};
-                M=opEye(), λ :: T=zero(T), transfer_to_cg :: Bool=true,
-                λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
-                etol :: T=√eps(T), window :: Int=0, itmax :: Int=0,
-                conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function symmlq(A, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = SymmlqSolver(A, b)
+  symmlq!(solver, A, b; kwargs...)
+end
+
+function symmlq!(solver :: SymmlqSolver{T,S}, A, b :: AbstractVector{T};
+                 M=opEye(), λ :: T=zero(T), transfer_to_cg :: Bool=true,
+                 λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
+                 etol :: T=√eps(T), window :: Int=0, itmax :: Int=0,
+                 conlim :: T=1/√eps(T), verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   m == n || error("System must be square")
   size(b, 1) == m || error("Inconsistent problem size")
   (verbose > 0) && @printf("SYMMLQ: system of size %d\n", n)
 
-  # Determine the storage type of b
-  S = typeof(b)
-
   # Test M == Iₘ
   MisI = isa(M, opEye)
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
 
+  # Set up workspace.
+  x, Mvold, Mv, w̅ = solver.x, solver.Mvold, solver.Mv, solver.w̅
+
   ϵM = eps(T)
-  x = kzeros(S, n)
+  x .= zero(T)
   ctol = conlim > 0 ? 1 / conlim : zero(T)
 
   # Initialize Lanczos process.
   # β₁ M v₁ = b.
-  Mvold = copy(b)
+  Mvold .= b
   vold = M * Mvold
   β₁ = @kdot(m, vold, Mvold)
   β₁ == 0 && return (x, SimpleStats(true, true, [zero(T)], [zero(T)], "x = 0 is a zero-residual solution"))
@@ -71,9 +77,9 @@ function symmlq(A, b :: AbstractVector{T};
   @kscal!(m, one(T) / β, vold)
   MisI || @kscal!(m, one(T) / β, Mvold)
 
-  w̅ = copy(vold)
+  w̅ .= vold
 
-  Mv = copy(A * vold)
+  Mv .= A * vold
   α = @kdot(m, vold, Mv) + λ
   @kaxpy!(m, -α, Mvold, Mv)  # Mv = Mv - α * Mvold
   v = M * Mv
diff --git a/src/tricg.jl b/src/tricg.jl
index 136bf1509..aeaf1f75a 100644
--- a/src/tricg.jl
+++ b/src/tricg.jl
@@ -9,7 +9,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montréal, April 2020.
 
-export tricg
+export tricg, tricg!
 
 """
     (x, y, stats) = tricg(A, b::AbstractVector{T}, c::AbstractVector{T};
@@ -34,7 +34,7 @@ If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite
 TriCG is based on the preconditioned orthogonal tridiagonalization process
 and its relation with the preconditioned block-Lanczos process.
 
-    [ M   O ]
+    [ M   0 ]
     [ 0   N ]
 
 indicates the weighted norm in which residuals are measured.
@@ -50,10 +50,15 @@ Information will be displayed every `verbose` iterations.
 
 * A. Montoison and D. Orban, *TriCG and TriMR: Two Iterative Methods for Symmetric Quasi-Definite Systems*, Cahier du GERAD G-2020-41, GERAD, Montréal, 2020.
 """
-function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T};
-               M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
-               spd :: Bool=false, snd :: Bool=false, flip :: Bool=false,
-               τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = TricgSolver(A, b)
+  tricg!(solver, A, b, c; kwargs...)
+end
+
+function tricg!(solver :: TricgSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T};
+                M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
+                spd :: Bool=false, snd :: Bool=false, flip :: Bool=false,
+                τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   length(b) == m || error("Inconsistent problem size")
@@ -71,28 +76,31 @@ function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.yₖ, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ,  solver.xₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ
+  gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₁, solver.gx₂ₖ
 
   # Initial solutions x₀ and y₀.
-  xₖ = kzeros(S, m)
-  yₖ = kzeros(S, n)
+  xₖ .= zero(T)
+  yₖ .= zero(T)
 
   iter = 0
   itmax == 0 && (itmax = m+n)
 
   # Initialize preconditioned orthogonal tridiagonalization process.
-  M⁻¹vₖ₋₁ = kzeros(S, m)  # v₀ = 0
-  N⁻¹uₖ₋₁ = kzeros(S, n)  # u₀ = 0
+  M⁻¹vₖ₋₁ .= zero(T)  # v₀ = 0
+  N⁻¹uₖ₋₁ .= zero(T)  # u₀ = 0
 
   # β₁Ev₁ = b ↔ β₁v₁ = Mb
-  M⁻¹vₖ = copy(b)
+  M⁻¹vₖ .= b
   vₖ = M * M⁻¹vₖ
   βₖ = sqrt(@kdot(m, vₖ, M⁻¹vₖ))  # β₁ = ‖v₁‖_E
   if βₖ ≠ 0
@@ -101,7 +109,7 @@ function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   end
 
   # γ₁Fu₁ = c ↔ γ₁u₁ = Nb
-  N⁻¹uₖ = copy(c)
+  N⁻¹uₖ .= c
   uₖ = N * N⁻¹uₖ
   γₖ = sqrt(@kdot(n, uₖ, N⁻¹uₖ))  # γ₁ = ‖u₁‖_F
   if γₖ ≠ 0
@@ -110,10 +118,10 @@ function tricg(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   end
 
   # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ
-  gx₂ₖ₋₁ = kzeros(S, m)
-  gy₂ₖ₋₁ = kzeros(S, n)
-  gx₂ₖ   = kzeros(S, m)
-  gy₂ₖ   = kzeros(S, n)
+  gx₂ₖ₋₁ .= zero(T)
+  gy₂ₖ₋₁ .= zero(T)
+  gx₂ₖ   .= zero(T)
+  gy₂ₖ   .= zero(T)
 
   # Compute ‖r₀‖² = (γ₁)² + (β₁)²
   rNorm = sqrt(γₖ^2 + βₖ^2)
diff --git a/src/trilqr.jl b/src/trilqr.jl
index fa74d7b6c..96a7c751b 100644
--- a/src/trilqr.jl
+++ b/src/trilqr.jl
@@ -10,7 +10,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, July 2019.
 
-export trilqr
+export trilqr, trilqr!
 
 """
     (x, t, stats) = trilqr(A, b::AbstractVector{T}, c::AbstractVector{T};
@@ -32,9 +32,14 @@ USYMCG point, when it exists. The transfer is based on the residual norm.
 
 * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
 """
-function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
-                atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
-                itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = TrilqrSolver(A, b)
+  trilqr!(solver, A, b, c; kwargs...)
+end
+
+function trilqr!(solver :: TrilqrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T};
+                 atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
+                 itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   length(b) == m || error("Inconsistent problem size")
@@ -44,19 +49,21 @@ function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₃, wₖ₋₂ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.t, solver.d̅, solver.wₖ₋₃, solver.wₖ₋₂
 
   # Initial solution x₀ and residual r₀ = b - Ax₀.
-  x = kzeros(S, n)      # x₀
+  x .= zero(T)          # x₀
   bNorm = @knrm2(m, b)  # rNorm = ‖r₀‖
 
   # Initial solution y₀ and residual s₀ = c - Aᵀy₀.
-  t = kzeros(S, m)      # t₀
+  t .= zero(T)          # t₀
   cNorm = @knrm2(n, c)  # sNorm = ‖s₀‖
 
   iter = 0
@@ -73,20 +80,20 @@ function trilqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   # Set up workspace.
   βₖ = @knrm2(m, b)          # β₁ = ‖v₁‖
   γₖ = @knrm2(n, c)          # γ₁ = ‖u₁‖
-  vₖ₋₁ = kzeros(S, m)        # v₀ = 0
-  uₖ₋₁ = kzeros(S, n)        # u₀ = 0
-  vₖ = b / βₖ                # v₁ = b / β₁
-  uₖ = c / γₖ                # u₁ = c / γ₁
+  vₖ₋₁ .= zero(T)            # v₀ = 0
+  uₖ₋₁ .= zero(T)            # u₀ = 0
+  vₖ .= b ./ βₖ              # v₁ = b / β₁
+  uₖ .= c ./ γₖ              # u₁ = c / γ₁
   cₖ₋₁ = cₖ = -one(T)        # Givens cosines used for the LQ factorization of Tₖ
   sₖ₋₁ = sₖ = zero(T)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ = kzeros(S, n)           # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
+  d̅ .= zero(T)               # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
   ζₖ₋₁ = ζbarₖ = zero(T)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
   ζₖ₋₂ = ηₖ = zero(T)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
   δbarₖ₋₁ = δbarₖ = zero(T)  # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
   ψbarₖ₋₁ = ψₖ₋₁ = zero(T)   # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
   ϵₖ₋₃ = λₖ₋₂ = zero(T)      # Components of Lₖ₋₁
-  wₖ₋₃ = kzeros(S, m)        # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ
-  wₖ₋₂ = kzeros(S, m)        # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ
+  wₖ₋₃ .= zero(T)            # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ
+  wₖ₋₂ .= zero(T)            # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ
 
   # Stopping criterion.
   inconsistent = false
diff --git a/src/trimr.jl b/src/trimr.jl
index dbcee7d41..5e02fa8eb 100644
--- a/src/trimr.jl
+++ b/src/trimr.jl
@@ -9,7 +9,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montréal, June 2020.
 
-export trimr
+export trimr, trimr!
 
 """
     (x, y, stats) = trimr(A, b::AbstractVector{T}, c::AbstractVector{T};
@@ -35,7 +35,7 @@ If `sp = true`, τ = 1, ν = 0 and the associated saddle-point linear system is
 TriMR is based on the preconditioned orthogonal tridiagonalization process
 and its relation with the preconditioned block-Lanczos process.
 
-    [ M   O ]
+    [ M   0 ]
     [ 0   N ]
 
 indicates the weighted norm in which residuals are measured.
@@ -51,10 +51,15 @@ Information will be displayed every `verbose` iterations.
 
 * A. Montoison and D. Orban, *TriCG and TriMR: Two Iterative Methods for Symmetric Quasi-Definite Systems*, Cahier du GERAD G-2020-41, GERAD, Montréal, 2020.
 """
-function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
-               M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
-               spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false,
-               τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = TrimrSolver(A, b)
+  trimr!(solver, A, b, c; kwargs...)
+end
+
+function trimr!(solver :: TrimrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T};
+                M=opEye(), N=opEye(), atol :: T=√eps(T), rtol :: T=√eps(T),
+                spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false,
+                τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   length(b) == m || error("Inconsistent problem size")
@@ -75,28 +80,31 @@ function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
   MisI || (eltype(M) == T) || error("eltype(M) ≠ $T")
   NisI || (eltype(N) == T) || error("eltype(N) ≠ $T")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  yₖ, N⁻¹uₖ₋₁, N⁻¹uₖ, xₖ, M⁻¹vₖ₋₁, M⁻¹vₖ = solver.yₖ, solver.N⁻¹uₖ₋₁, solver.N⁻¹uₖ,  solver.xₖ, solver.M⁻¹vₖ₋₁, solver.M⁻¹vₖ
+  gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ = solver.gy₂ₖ₋₃, solver.gy₂ₖ₋₂, solver.gy₂ₖ₋₁, solver.gy₂ₖ, solver.gx₂ₖ₋₃, solver.gx₂ₖ₋₂, solver.gx₂ₖ₋₁, solver.gx₂ₖ
 
   # Initial solutions x₀ and y₀.
-  xₖ = kzeros(S, m)
-  yₖ = kzeros(S, n)
+  xₖ .= zero(T)
+  yₖ .= zero(T)
 
   iter = 0
   itmax == 0 && (itmax = m+n)
 
   # Initialize preconditioned orthogonal tridiagonalization process.
-  M⁻¹vₖ₋₁ = kzeros(S, m)  # v₀ = 0
-  N⁻¹uₖ₋₁ = kzeros(S, n)  # u₀ = 0
+  M⁻¹vₖ₋₁ .= zero(T)  # v₀ = 0
+  N⁻¹uₖ₋₁ .= zero(T)  # u₀ = 0
 
   # β₁Ev₁ = b ↔ β₁v₁ = Mb
-  M⁻¹vₖ = copy(b)
+  M⁻¹vₖ .= b
   vₖ = M * M⁻¹vₖ
   βₖ = sqrt(@kdot(m, vₖ, M⁻¹vₖ))  # β₁ = ‖v₁‖_E
   if βₖ ≠ 0
@@ -105,7 +113,7 @@ function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   end
 
   # γ₁Fu₁ = c ↔ γ₁u₁ = Nb
-  N⁻¹uₖ = copy(c)
+  N⁻¹uₖ .= c
   uₖ = N * N⁻¹uₖ
   γₖ = sqrt(@kdot(n, uₖ, N⁻¹uₖ))  # γ₁ = ‖u₁‖_F
   if γₖ ≠ 0
@@ -114,14 +122,14 @@ function trimr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   end
 
   # Initialize directions Gₖ such that (GₖRₖ)ᵀ = (Wₖ)ᵀ.
-  gx₂ₖ₋₃ = kzeros(S, m)
-  gy₂ₖ₋₃ = kzeros(S, n)
-  gx₂ₖ₋₂ = kzeros(S, m)
-  gy₂ₖ₋₂ = kzeros(S, n)
-  gx₂ₖ₋₁ = kzeros(S, m)
-  gy₂ₖ₋₁ = kzeros(S, n)
-  gx₂ₖ   = kzeros(S, m)
-  gy₂ₖ   = kzeros(S, n)
+  gx₂ₖ₋₃ .= zero(T)
+  gy₂ₖ₋₃ .= zero(T)
+  gx₂ₖ₋₂ .= zero(T)
+  gy₂ₖ₋₂ .= zero(T)
+  gx₂ₖ₋₁ .= zero(T)
+  gy₂ₖ₋₁ .= zero(T)
+  gx₂ₖ   .= zero(T)
+  gy₂ₖ   .= zero(T)
 
   # Compute ‖r₀‖² = (γ₁)² + (β₁)²
   rNorm = sqrt(γₖ^2 + βₖ^2)
diff --git a/src/usymlq.jl b/src/usymlq.jl
index b6b521088..1db2ca35f 100644
--- a/src/usymlq.jl
+++ b/src/usymlq.jl
@@ -17,7 +17,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, November 2018.
 
-export usymlq
+export usymlq, usymlq!
 
 """
     (x, stats) = usymlq(A, b::AbstractVector{T}, c::AbstractVector{T};
@@ -42,9 +42,14 @@ when it exists. The transfer is based on the residual norm.
 * A. Buttari, D. Orban, D. Ruiz and D. Titley-Peloquin, *A tridiagonalization method for symmetric saddle-point and quasi-definite systems*, SIAM Journal on Scientific Computing, 41(5), pp. 409--432, 2019.
 * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
 """
-function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T};
-                atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
-                itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = UsymlqSolver(A, b)
+  usymlq!(solver, A, b, c; kwargs...)
+end
+
+function usymlq!(solver :: UsymlqSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T};
+                 atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
+                 itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   length(b) == m || error("Inconsistent problem size")
@@ -53,16 +58,18 @@ function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  uₖ₋₁, uₖ, x, d̅, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.x, solver.d̅, solver.vₖ₋₁, solver.vₖ
 
   # Initial solution x₀ and residual norm ‖r₀‖.
-  x = kzeros(S, n)
-  bNorm = @knrm2(m, b)  # ‖r₀‖
+  x .= zero(T)
+  bNorm = @knrm2(m, b)
   bNorm == 0 && return (x, SimpleStats(true, false, [bNorm], T[], "x = 0 is a zero-residual solution"))
 
   iter = 0
@@ -73,16 +80,15 @@ function usymlq(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   (verbose > 0) && @printf("%5s  %7s\n", "k", "‖rₖ‖")
   display(iter, verbose) && @printf("%5d  %7.1e\n", iter, bNorm)
 
-  # Set up workspace.
   βₖ = @knrm2(m, b)          # β₁ = ‖v₁‖
   γₖ = @knrm2(n, c)          # γ₁ = ‖u₁‖
-  vₖ₋₁ = kzeros(S, m)        # v₀ = 0
-  uₖ₋₁ = kzeros(S, n)        # u₀ = 0
-  vₖ = b / βₖ                # v₁ = b / β₁
-  uₖ = c / γₖ                # u₁ = c / γ₁
+  vₖ₋₁ .= zero(T)            # v₀ = 0
+  uₖ₋₁ .= zero(T)            # u₀ = 0
+  vₖ .= b ./ βₖ              # v₁ = b / β₁
+  uₖ .= c ./ γₖ              # u₁ = c / γ₁
   cₖ₋₁ = cₖ = -one(T)        # Givens cosines used for the LQ factorization of Tₖ
   sₖ₋₁ = sₖ = zero(T)        # Givens sines used for the LQ factorization of Tₖ
-  d̅ = kzeros(S, n)           # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
+  d̅ .= zero(T)               # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
   ζₖ₋₁ = ζbarₖ = zero(T)     # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
   ζₖ₋₂ = ηₖ = zero(T)        # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
   δbarₖ₋₁ = δbarₖ = zero(T)  # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations
diff --git a/src/usymqr.jl b/src/usymqr.jl
index bb739b9dc..6cf3d7b7a 100644
--- a/src/usymqr.jl
+++ b/src/usymqr.jl
@@ -17,7 +17,7 @@
 # Alexis Montoison, <alexis.montoison@polymtl.ca>
 # Montreal, November 2018.
 
-export usymqr
+export usymqr, usymqr!
 
 """
     (x, stats) = usymqr(A, b::AbstractVector{T}, c::AbstractVector{T};
@@ -39,9 +39,14 @@ USYMQR finds the minimum-norm solution if problems are inconsistent.
 * A. Buttari, D. Orban, D. Ruiz and D. Titley-Peloquin, *A tridiagonalization method for symmetric saddle-point and quasi-definite systems*, SIAM Journal on Scientific Computing, 41(5), pp. 409--432, 2019.
 * A. Montoison and D. Orban, *BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*, SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
 """
-function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
-                atol :: T=√eps(T), rtol :: T=√eps(T),
-                itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where T <: AbstractFloat
+function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat
+  solver = UsymqrSolver(A, b)
+  usymqr!(solver, A, b, c; kwargs...)
+end
+
+function usymqr!(solver :: UsymqrSolver{T,S}, A, b :: AbstractVector{T}, c :: AbstractVector{T};
+                 atol :: T=√eps(T), rtol :: T=√eps(T),
+                 itmax :: Int=0, verbose :: Int=0, history :: Bool=false) where {S, T <: AbstractFloat}
 
   m, n = size(A)
   length(b) == m || error("Inconsistent problem size")
@@ -50,15 +55,17 @@ function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
 
   # Check type consistency
   eltype(A) == T || error("eltype(A) ≠ $T")
+  ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+  ktypeof(c) == S || error("ktypeof(c) ≠ $S")
 
   # Compute the adjoint of A
   Aᵀ = A'
 
-  # Determine the storage type of b
-  S = typeof(b)
+  # Set up workspace.
+  vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ = solver.vₖ₋₁, solver.vₖ, solver.x, solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ
 
   # Initial solution x₀ and residual norm ‖r₀‖.
-  x = kzeros(S, n)
+  x .= zero(T)
   rNorm = @knrm2(m, b)
   rNorm == 0 && return x, SimpleStats(true, false, [rNorm], T[], "x = 0 is a zero-residual solution")
 
@@ -72,17 +79,16 @@ function usymqr(A, b :: AbstractVector{T}, c :: AbstractVector{T};
   (verbose > 0) && @printf("%5s  %7s  %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖")
   display(iter, verbose) && @printf("%5d  %7.1e  %7s\n", iter, rNorm, "✗ ✗ ✗ ✗")
 
-  # Set up workspace.
   βₖ = @knrm2(m, b)           # β₁ = ‖v₁‖
   γₖ = @knrm2(n, c)           # γ₁ = ‖u₁‖
-  vₖ₋₁ = kzeros(S, m)         # v₀ = 0
-  uₖ₋₁ = kzeros(S, n)         # u₀ = 0
-  vₖ = b / βₖ                 # v₁ = b / β₁
-  uₖ = c / γₖ                 # u₁ = c / γ₁
+  vₖ₋₁ .= zero(T)             # v₀ = 0
+  uₖ₋₁ .= zero(T)             # u₀ = 0
+  vₖ .= b ./ βₖ               # v₁ = b / β₁
+  uₖ .= c ./ γₖ               # u₁ = c / γ₁
   cₖ₋₂ = cₖ₋₁ = cₖ = zero(T)  # Givens cosines used for the QR factorization of Tₖ₊₁.ₖ
   sₖ₋₂ = sₖ₋₁ = sₖ = zero(T)  # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
-  wₖ₋₂ = kzeros(S, n)         # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
-  wₖ₋₁ = kzeros(S, n)         # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
+  wₖ₋₂ .= zero(T)             # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
+  wₖ₋₁ .= zero(T)             # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
   ζbarₖ = βₖ                  # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
 
   # Stopping criterion.
diff --git a/src/variants.jl b/src/variants.jl
index fb4049433..33a560b48 100644
--- a/src/variants.jl
+++ b/src/variants.jl
@@ -28,49 +28,31 @@ end
 # Variants where matrix-vector products with A and Aᵀ are required
 for fn in (:cgls, :cgne, :lnlq, :craig, :craigmr, :crls, :crmr, :lslq, :lsqr, :lsmr, :bilq, :qmr)
   @eval begin
-    $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A), convert(Vector{T}, b); wrap_preconditioners(kwargs, Vector{T})...)
-
     $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A, storagetype=typeof(b)), b; wrap_preconditioners(kwargs, typeof(b))...)
+      $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b)), b; wrap_preconditioners(kwargs, ktypeof(b))...)
   end
 end
 
 # Variants for USYMLQ, USYMQR, TriCG, TriMR, TriLQR and BiLQR
 for fn in (:usymlq, :usymqr, :tricg, :trimr, :trilqr, :bilqr)
   @eval begin
-    $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}, c :: SparseVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A), convert(Vector{T}, b), convert(Vector{T}, c); wrap_preconditioners(kwargs, Vector{T})...)
-
-    $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}, c :: SparseVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A), b, convert(Vector{T}, c); wrap_preconditioners(kwargs, Vector{T})...)
-
-    $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A), convert(Vector{T}, b), c; wrap_preconditioners(kwargs, Vector{T})...)
-
     $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}, c :: AbstractVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A, storagetype=typeof(b)), b, c; wrap_preconditioners(kwargs, typeof(b))...)
+      $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b)), b, c; wrap_preconditioners(kwargs, ktypeof(b))...)
   end
 end
 
 # Variants where matrix-vector products with A are only required
 for fn in (:cg_lanczos, :cg, :cr, :minres, :minres_qlp, :symmlq, :cgs, :bicgstab, :diom, :dqgmres)
   @eval begin
-    $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A, symmetric=true), convert(Vector{T}, b); wrap_preconditioners(kwargs, Vector{T})...)
-
     $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A, storagetype=typeof(b), symmetric=true), b; wrap_preconditioners(kwargs, typeof(b))...)
+      $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b), symmetric=true), b; wrap_preconditioners(kwargs, ktypeof(b))...)
   end
 end
 
 # Variants for CG-LANCZOS-SHIFT-SEQ
 for fn in [:cg_lanczos_shift_seq]
   @eval begin
-    $fn(A :: AbstractMatrix{T}, b :: SparseVector{T}, shifts :: AbstractVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A, symmetric=true), convert(Vector{T}, b), shifts; wrap_preconditioners(kwargs, Vector{T})...)
-
     $fn(A :: AbstractMatrix{T}, b :: AbstractVector{T}, shifts :: AbstractVector{T}; kwargs...) where T <: AbstractFloat =
-      $fn(PreallocatedLinearOperator(A, storagetype=typeof(b), symmetric=true), b, shifts; wrap_preconditioners(kwargs, typeof(b))...)
+      $fn(PreallocatedLinearOperator(A, storagetype=ktypeof(b), symmetric=true), b, shifts; wrap_preconditioners(kwargs, ktypeof(b))...)
   end
 end
diff --git a/test/test_alloc.jl b/test/test_alloc.jl
index 421cf1587..b5e2eb146 100644
--- a/test/test_alloc.jl
+++ b/test/test_alloc.jl
@@ -30,6 +30,11 @@ function test_alloc()
   actual_symmlq_bytes = @allocated symmlq(A, b)
   @test actual_symmlq_bytes ≤ 1.1 * expected_symmlq_bytes
 
+  solver = SymmlqSolver(A, b)
+  symmlq!(solver, A, b)  # warmup
+  inplace_symmlq_bytes = @allocated symmlq!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_symmlq_bytes == 672)
+
   # without preconditioner and with Ap preallocated, CG needs 3 n-vectors: x, r, p
   storage_cg(n) = 3 * n
   storage_cg_bytes(n) = 8 * storage_cg(n)
@@ -39,6 +44,11 @@ function test_alloc()
   actual_cg_bytes = @allocated cg(A, b)
   @test actual_cg_bytes ≤ 1.1 * expected_cg_bytes
 
+  solver = CgSolver(A, b)
+  cg!(solver, A, b)  # warmup
+  inplace_cg_bytes = @allocated cg!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_cg_bytes == 208)
+
   # without preconditioner and with Ap preallocated, MINRES needs 5 n-vectors: x, r1, r2, w1, w2
   storage_minres(n) = 5 * n
   storage_minres_bytes(n) = 8 * storage_minres(n)
@@ -67,6 +77,11 @@ function test_alloc()
   actual_diom_bytes = @allocated diom(A, b, memory=mem)
   @test actual_diom_bytes ≤ 1.05 * expected_diom_bytes
 
+  solver = DiomSolver(A, b)
+  diom!(solver, A, b)  # warmup
+  inplace_diom_bytes = @allocated diom!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_diom_bytes == 208)
+
   # with Ap preallocated, CG_LANCZOS needs 4 n-vectors: x, v, v_prev, p
   storage_cg_lanczos(n) = 4 * n
   storage_cg_lanczos_bytes(n) = 8 * storage_cg_lanczos(n)
@@ -76,12 +91,17 @@ function test_alloc()
   actual_cg_lanczos_bytes = @allocated cg_lanczos(A, b)
   @test actual_cg_lanczos_bytes ≤ 1.1 * expected_cg_lanczos_bytes
 
+  solver = CgLanczosSolver(A, b)
+  cg_lanczos!(solver, A, b)  # warmup
+  inplace_cg_lanczos_bytes = @allocated cg_lanczos!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_cg_lanczos_bytes == 144)
+
   # with Ap preallocated, CG_LANCZOS_SHIFT_SEQ needs:
   # - 2 n-vectors: v, v_prev
   # - 2 (n*nshifts)-matrices: x, p
   # - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms
-  # - 2 nshifts-bitArray: indefinite, converged
-  storage_cg_lanczos_shift_seq(n, nshifts) = (2 * n) + (2 * n * nshifts) + (5 * nshifts) + (2 * nshifts / 64)
+  # - 3 nshifts-bitArray: indefinite, converged, not_cv
+  storage_cg_lanczos_shift_seq(n, nshifts) = (2 * n) + (2 * n * nshifts) + (5 * nshifts) + (3 * nshifts / 64)
   storage_cg_lanczos_shift_seq_bytes(n, nshifts) = 8 * storage_cg_lanczos_shift_seq(n, nshifts)
 
   expected_cg_lanczos_shift_seq_bytes = storage_cg_lanczos_shift_seq_bytes(n, nshifts)
@@ -89,6 +109,12 @@ function test_alloc()
   actual_cg_lanczos_shift_seq_bytes = @allocated cg_lanczos_shift_seq(A, b, shifts)
   @test actual_cg_lanczos_shift_seq_bytes ≤ 1.1 * expected_cg_lanczos_shift_seq_bytes
 
+  solver = CgLanczosShiftSolver(A, b, shifts)
+  cg_lanczos_shift_seq!(solver, A, b, shifts)  # warmup
+  inplace_cg_lanczos_shift_seq_bytes = @allocated cg_lanczos_shift_seq!(solver, A, b, shifts)
+  println(inplace_cg_lanczos_shift_seq_bytes)
+  @test (VERSION < v"1.5") || (inplace_cg_lanczos_shift_seq_bytes == 320)
+
   # without preconditioner and with Ap preallocated, DQGMRES needs:
   # - 1 n-vector: x
   # - 2 (n*mem)-matrices: P, V
@@ -102,6 +128,11 @@ function test_alloc()
   actual_dqgmres_bytes = @allocated dqgmres(A, b, memory=mem)
   @test actual_dqgmres_bytes ≤ 1.05 * expected_dqgmres_bytes
 
+  solver = DqgmresSolver(A, b)
+  dqgmres!(solver, A, b)  # warmup
+  inplace_dqgmres_bytes = @allocated dqgmres!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_dqgmres_bytes == 208)
+
   # without preconditioner and with Ap preallocated, CR needs 4 n-vectors: x, r, p, q
   storage_cr(n) = 4 * n
   storage_cr_bytes(n) = 8 * storage_cr(n)
@@ -111,6 +142,11 @@ function test_alloc()
   actual_cr_bytes = @allocated cr(A, b, rtol=1e-6)
   @test actual_cr_bytes ≤ 1.1 * expected_cr_bytes
 
+  solver = CrSolver(A, b)
+  cr!(solver, A, b, rtol=1e-6)  # warmup
+  inplace_cr_bytes = @allocated cr!(solver, A, b, rtol=1e-6)
+  @test (VERSION < v"1.5") || (inplace_cr_bytes == 208)
+
   # without preconditioner and with (Ap, Aᵀq) preallocated, CRMR needs:
   # - 2 n-vectors: x, p
   # - 1 m-vector: r
@@ -122,6 +158,11 @@ function test_alloc()
   actual_crmr_bytes = @allocated crmr(Au, c)
   @test actual_crmr_bytes ≤ 1.1 * expected_crmr_bytes
 
+  solver = CrmrSolver(Au, c)
+  crmr!(solver, Au, c)  # warmup
+  inplace_crmr_bytes = @allocated crmr!(solver, Au, c)
+  @test (VERSION < v"1.5") || (inplace_crmr_bytes == 208)
+
   # without preconditioner and with Ap preallocated, CGS needs 5 n-vectors: x, r, u, p, q
   storage_cgs(n) = 5 * n
   storage_cgs_bytes(n) = 8 * storage_cgs(n)
@@ -131,6 +172,11 @@ function test_alloc()
   actual_cgs_bytes = @allocated cgs(A, b)
   @test actual_cgs_bytes ≤ 1.1 * expected_cgs_bytes
 
+  solver = CgsSolver(A, b)
+  cgs!(solver, A, b)  # warmup
+  inplace_cgs_bytes = @allocated cgs!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_cgs_bytes == 208)
+
   # without preconditioner and with Ap preallocated, BICGSTAB needs 5 n-vectors: x, r, p, v, s
   storage_bicgstab(n) = 5 * n
   storage_bicgstab_bytes(n) = 8 * storage_bicgstab(n)
@@ -140,6 +186,11 @@ function test_alloc()
   actual_bicgstab_bytes = @allocated bicgstab(A, b)
   @test actual_bicgstab_bytes ≤ 1.1 * expected_bicgstab_bytes
 
+  solver = BicgstabSolver(A, b)
+  bicgstab!(solver, A, b)  # warmup
+  inplace_bicgstab_bytes = @allocated bicgstab!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_bicgstab_bytes == 208)
+
   # with (Ap, Aᵀq) preallocated, CRAIGMR needs:
   # - 2 n-vector: x, v
   # - 4 m-vectors: y, u, w, wbar
@@ -151,6 +202,11 @@ function test_alloc()
   actual_craigmr_bytes = @allocated craigmr(Au, c)
   @test actual_craigmr_bytes ≤ 1.1 * expected_craigmr_bytes
 
+  solver = CraigmrSolver(Au, c)
+  craigmr!(solver, Au, c)  # warmup
+  inplace_craigmr_bytes = @allocated craigmr!(solver, Au, c)
+  @test (VERSION < v"1.5") || (inplace_craigmr_bytes == 208)
+
   # without preconditioner and with (Ap, Aᵀq) preallocated, CGNE needs:
   # - 2 n-vectors: x, p
   # - 1 m-vector: r
@@ -162,6 +218,11 @@ function test_alloc()
   actual_cgne_bytes = @allocated cgne(Au, c)
   @test actual_cgne_bytes ≤ 1.1 * expected_cgne_bytes
 
+  solver = CgneSolver(Au, c)
+  cgne!(solver, Au, c)  # warmup
+  inplace_cgne_bytes = @allocated cgne!(solver, Au, c)
+  @test (VERSION < v"1.5") || (inplace_cgne_bytes == 208)
+
   # with (Ap, Aᵀq) preallocated, LNLQ needs:
   # - 2 n-vector: x, v
   # - 3 m-vectors: y, w̄, u
@@ -173,6 +234,11 @@ function test_alloc()
   actual_lnlq_bytes = @allocated lnlq(Au, c)
   @test actual_lnlq_bytes ≤ 1.1 * expected_lnlq_bytes
 
+  solver = LnlqSolver(Au, c)
+  lnlq!(solver, Au, c)  # warmup
+  inplace_lnlq_bytes = @allocated lnlq!(solver, Au, c)
+  @test (VERSION < v"1.5") || (inplace_lnlq_bytes == 208)
+
   # with (Ap, Aᵀq) preallocated, CRAIG needs:
   # - 2 n-vector: x, v
   # - 3 m-vectors: y, w, u
@@ -184,6 +250,11 @@ function test_alloc()
   actual_craig_bytes = @allocated craig(Au, c)
   @test actual_craig_bytes ≤ 1.1 * expected_craig_bytes
 
+  solver = CraigSolver(Au, c)
+  craig!(solver, Au, c)  # warmup
+  inplace_craig_bytes = @allocated craig!(solver, Au, c)
+  @test (VERSION < v"1.5") || (inplace_craig_bytes == 208)
+
   # without preconditioner and with (Ap, Aᵀq) preallocated, LSLQ needs:
   # - 3 m-vectors: x_lq, v, w̄ (= x_cg)
   # - 1 n-vector: u
@@ -195,6 +266,11 @@ function test_alloc()
   actual_lslq_bytes = @allocated lslq(Ao, b)
   @test actual_lslq_bytes ≤ 1.1 * expected_lslq_bytes
 
+  solver = LslqSolver(Ao, b)
+  lslq!(solver, Ao, b)  # warmup
+  inplace_lslq_bytes = @allocated lslq!(solver, Ao, b)
+  @test (VERSION < v"1.5") || (inplace_lslq_bytes == 576)
+
   # without preconditioner and with (Ap, Aᵀq) preallocated, CGLS needs:
   # - 2 m-vectors: x, p
   # - 1 n-vector: r
@@ -206,6 +282,11 @@ function test_alloc()
   actual_cgls_bytes = @allocated cgls(Ao, b)
   @test actual_cgls_bytes ≤ 1.1 * expected_cgls_bytes
 
+  solver = CglsSolver(Ao, b)
+  cgls!(solver, Ao, b)  # warmup
+  inplace_cgls_bytes = @allocated cgls!(solver, Ao, b)
+  @test (VERSION < v"1.5") || (inplace_cgls_bytes == 208)
+
   # without preconditioner and with (Ap, Aᵀq) preallocated, LSQR needs:
   # - 3 m-vectors: x, v, w
   # - 1 n-vector: u
@@ -217,6 +298,11 @@ function test_alloc()
   actual_lsqr_bytes = @allocated lsqr(Ao, b)
   @test actual_lsqr_bytes ≤ 1.1 * expected_lsqr_bytes
 
+  solver = LsqrSolver(Ao, b)
+  lsqr!(solver, Ao, b)  # warmup
+  inplace_lsqr_bytes = @allocated lsqr!(solver, Ao, b)
+  @test (VERSION < v"1.5") || (inplace_lsqr_bytes == 432)
+
   # without preconditioner and with (Ap, Aᵀq) preallocated, CRLS needs:
   # - 3 m-vectors: x, p, Ar
   # - 2 n-vector: r, Ap
@@ -228,6 +314,11 @@ function test_alloc()
   actual_crls_bytes = @allocated crls(Ao, b)
   @test actual_crls_bytes ≤ 1.1 * expected_crls_bytes
 
+  solver = CrlsSolver(Ao, b)
+  crls!(solver, Ao, b)  # warmup
+  inplace_crls_bytes = @allocated crls!(solver, Ao, b)
+  @test (VERSION < v"1.5") || (inplace_crls_bytes == 208)
+
   # without preconditioner and with (Ap, Aᵀq) preallocated, LSMR needs:
   # - 4 m-vectors: x, v, h, hbar
   # - 1 n-vector: u
@@ -239,6 +330,11 @@ function test_alloc()
   actual_lsmr_bytes = @allocated lsmr(Ao, b)
   @test actual_lsmr_bytes ≤ 1.1 * expected_lsmr_bytes
 
+  solver = LsmrSolver(Ao, b)
+  lsmr!(solver, Ao, b)  # warmup
+  inplace_lsmr_bytes = @allocated lsmr!(solver, Ao, b)
+  @test (VERSION < v"1.5") || (inplace_lsmr_bytes == 336)
+
   # with (Ap, Aᵀq) preallocated, USYMQR needs:
   # - 5 m-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ
   # - 2 n-vectors: uₖ₋₁, uₖ
@@ -250,6 +346,11 @@ function test_alloc()
   actual_usymqr_bytes = @allocated usymqr(Ao, b, c)
   @test actual_usymqr_bytes ≤ 1.1 * expected_usymqr_bytes
 
+  solver = UsymqrSolver(Ao, b)
+  usymqr!(solver, Ao, b, c)  # warmup
+  inplace_usymqr_bytes = @allocated usymqr!(solver, Ao, b, c)
+  @test (VERSION < v"1.5") || (inplace_usymqr_bytes == 208)
+
   # with (Ap, Aᵀq) preallocated, TRILQR needs:
   # - 5 m-vectors: vₖ₋₁, vₖ, t, wₖ₋₁, wₖ
   # - 4 n-vectors: uₖ₋₁, uₖ, x, d̅
@@ -261,6 +362,11 @@ function test_alloc()
   actual_trilqr_bytes = @allocated trilqr(A, b, b)
   @test actual_trilqr_bytes ≤ 1.1 * expected_trilqr_bytes
 
+  solver = TrilqrSolver(A, b)
+  trilqr!(solver, A, b, b)  # warmup
+  inplace_trilqr_bytes = @allocated trilqr!(solver, A, b, b)
+  @test (VERSION < v"1.5") || (inplace_trilqr_bytes == 208)
+
   # with (Ap, Aᵀq) preallocated, BILQ needs:
   # - 6 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅
   storage_bilq(n) = 6 * n
@@ -271,6 +377,11 @@ function test_alloc()
   actual_bilq_bytes = @allocated bilq(A, b)
   @test actual_bilq_bytes ≤ 1.1 * expected_bilq_bytes
 
+  solver = BilqSolver(A, b)
+  bilq!(solver, A, b)  # warmup
+  inplace_bilq_bytes = @allocated bilq!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_bilq_bytes == 208)
+
   # with (Ap, Aᵀq) preallocated, BILQR needs:
   # - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₁, wₖ
   storage_bilqr(n) = 9 * n
@@ -281,6 +392,11 @@ function test_alloc()
   actual_bilqr_bytes = @allocated bilqr(A, b, b)
   @test actual_bilqr_bytes ≤ 1.1 * expected_bilqr_bytes
 
+  solver = BilqrSolver(A, b)
+  bilqr!(solver, A, b, b)  # warmup
+  inplace_bilqr_bytes = @allocated bilqr!(solver, A, b, b)
+  @test (VERSION < v"1.5") || (inplace_bilqr_bytes == 208)
+
   # with Ap preallocated, MINRES-QLP needs:
   # - 5 n-vectors: wₖ₋₁, wₖ, vₖ₋₁, vₖ, x
   storage_minres_qlp(n) = 5 * n
@@ -291,6 +407,11 @@ function test_alloc()
   actual_minres_qlp_bytes = @allocated minres_qlp(A, b)
   @test actual_minres_qlp_bytes ≤ 1.1 * expected_minres_qlp_bytes
 
+  solver = MinresQlpSolver(A, b)
+  minres_qlp!(solver, A, b)  # warmup
+  inplace_minres_qlp_bytes = @allocated minres_qlp!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_minres_qlp_bytes == 208)
+
   # with (Ap, Aᵗp) preallocated, QMR needs:
   # - 7 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ
   storage_qmr(n) = 7 * n
@@ -301,6 +422,11 @@ function test_alloc()
   actual_qmr_bytes = @allocated qmr(A, b)
   @test actual_qmr_bytes ≤ 1.1 * expected_qmr_bytes
 
+  solver = QmrSolver(A, b)
+  qmr!(solver, A, b)  # warmup
+  inplace_qmr_bytes = @allocated qmr!(solver, A, b)
+  @test (VERSION < v"1.5") || (inplace_qmr_bytes == 208)
+
   # with (Ap, Aᵀp) preallocated, USYMLQ needs:
   # - 4 n-vectors: uₖ₋₁, uₖ, x, d̅
   # - 2 m-vectors: vₖ₋₁, vₖ
@@ -312,6 +438,11 @@ function test_alloc()
   actual_usymlq_bytes = @allocated usymlq(Au, c, b)
   @test actual_usymlq_bytes ≤ 1.1 * expected_usymlq_bytes
 
+  solver = UsymlqSolver(Au, c)
+  usymlq!(solver, Au, c, b)  # warmup
+  inplace_usymlq_bytes = @allocated usymlq!(solver, Au, c, b)
+  @test (VERSION < v"1.5") || (inplace_usymlq_bytes == 208)
+
   # with (Ap, Aᵀp) preallocated, TriCG needs:
   # - 5 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₁, gy₂ₖ
   # - 5 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₁, gx₂ₖ
@@ -323,6 +454,11 @@ function test_alloc()
   actual_tricg_bytes = @allocated tricg(Au, c, b)
   @test actual_tricg_bytes ≤ 1.1 * expected_tricg_bytes
 
+  solver = TricgSolver(Au, c)
+  tricg!(solver, Au, c, b)  # warmup
+  inplace_tricg_bytes = @allocated tricg!(solver, Au, c, b)
+  @test (VERSION < v"1.5") || (inplace_tricg_bytes == 208)
+
   # with (Ap, Aᵀp) preallocated, TriMR needs:
   # - 7 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ
   # - 7 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ
@@ -333,6 +469,11 @@ function test_alloc()
   trimr(Au, c, b)  # warmup
   actual_trimr_bytes = @allocated trimr(Au, c, b)
   @test actual_trimr_bytes ≤ 1.1 * expected_trimr_bytes
+
+  solver = TrimrSolver(Au, c)
+  trimr!(solver, Au, c, b)  # warmup
+  inplace_trimr_bytes = @allocated trimr!(solver, Au, c, b)
+  @test (VERSION < v"1.5") || (inplace_trimr_bytes == 208)
 end
 
 @testset "alloc" begin
diff --git a/test/test_aux.jl b/test/test_aux.jl
index e8dcf16ab..c02981ee7 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -93,12 +93,17 @@
   @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178
 
   # test kzeros and kones
-  Krylov.kzeros(Vector{Float64}, 10) == zeros(10)
-  Krylov.kones(Vector{Float64}, 10) == ones(10)
-
-  a = rand(10)
-  b = view(a, 1:4)
-  S = typeof(b)
-  Krylov.kzeros(S, 10) == zeros(10)
-  Krylov.kones(S, 10) == ones(10)
+  @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10)
+  @test Krylov.kones(Vector{Float64}, 10) == ones(10)
+
+  # test ktypeof
+  a = sprand(Float32, 10, 0.5)
+  b = view(a, 4:8)
+  @test Krylov.ktypeof(a) == Vector{Float32}
+  @test Krylov.ktypeof(b) == Vector{Float32}
+
+  a = sprand(Float64, 10, 0.5)
+  b = view(a, 4:8)
+  @test Krylov.ktypeof(a) == Vector{Float64}
+  @test Krylov.ktypeof(b) == Vector{Float64}
 end
diff --git a/test/test_lslq.jl b/test/test_lslq.jl
index ef11edc23..de5b1b91b 100644
--- a/test/test_lslq.jl
+++ b/test/test_lslq.jl
@@ -24,7 +24,7 @@
   V, _ = qr(rand(4, 4))
   A = U * [Σ ; zeros(2, 4)] * V'
   b = ones(6)
-  (x, x_cg, err_lbnds, err_ubnds_lq, err_ubnds_cg, stats) = lslq(A, b, σ=1 - 1.0e-10)
+  (x, x_cg, err_lbnds, err_ubnds_lq, err_ubnds_cg, stats) = lslq(A, b, σ=1 - 1.0e-10, history=true)
   @test isapprox(err_ubnds_lq[end], 0.0, atol=sqrt(eps(Float64)))
   @test isapprox(err_ubnds_cg[end], 0.0, atol=sqrt(eps(Float64)))
   x_exact = A \ b
diff --git a/test/test_variants.jl b/test/test_variants.jl
index ac6cbbce1..e0e6a3e87 100644
--- a/test/test_variants.jl
+++ b/test/test_variants.jl
@@ -9,8 +9,9 @@
         A_sparse = convert(SparseMatrixCSC{T,S}, A_dense)
         b_dense = ones(T, 5)
         b_sparse = convert(SparseVector{T,S}, b_dense)
+        b_view = view(b_dense, 1:5)
         for A in (A_dense, A_sparse)
-          for b in (b_dense, b_sparse)
+          for b in (b_dense, b_sparse, b_view)
             if fn == :cg_lanczos_shift_seq
               shifts = [-one(T), one(T)]
               @eval $fn($A, $b, $shifts)
@@ -19,7 +20,8 @@
             elseif fn in (:usymlq, :usymqr, :tricg, :trimr, :trilqr, :bilqr)
               c_dense = ones(T, 5)
               c_sparse = convert(SparseVector{T,S}, c_dense)
-              for c in (c_dense, c_sparse)
+              c_view = view(c_dense, 1:5)
+              for c in (c_dense, c_sparse, c_view)
                 @eval $fn($A, $b, $c)
                 @eval $fn($transpose($A), $b, $c)
                 @eval $fn($adjoint($A), $b, $c)