diff --git a/stdlib/InteractiveUtils/Project.toml b/stdlib/InteractiveUtils/Project.toml
index 2dfe098c56de8..e13902375e005 100644
--- a/stdlib/InteractiveUtils/Project.toml
+++ b/stdlib/InteractiveUtils/Project.toml
@@ -3,7 +3,6 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
 [deps]
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 378fa40626eb7..b85403f581e1b 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -3,7 +3,7 @@
 module InteractiveUtils
 
 export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith, varinfo,
-    versioninfo, subtypes, peakflops, @which, @edit, @less, @functionloc, @code_warntype,
+    versioninfo, subtypes, @which, @edit, @less, @functionloc, @code_warntype,
     @code_typed, @code_lowered, @code_llvm, @code_native, clipboard
 
 import Base.Docs.apropos
@@ -12,7 +12,6 @@ using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, show_unquote
     to_tuple_type, signature_type, format_bytes
 
 using Markdown
-using LinearAlgebra  # for peakflops
 
 include("editless.jl")
 include("codeview.jl")
@@ -308,36 +307,25 @@ function dumpsubtypes(io::IO, x::DataType, m::Module, n::Int, indent)
     nothing
 end
 
-const Distributed_modref = Ref{Module}()
-
+# TODO: @deprecate peakflops to LinearAlgebra
+export peakflops
 """
     peakflops(n::Integer=2000; parallel::Bool=false)
 
 `peakflops` computes the peak flop rate of the computer by using double precision
-[`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it
-multiplies a matrix of size `n x n`, where `n = 2000`. If the underlying BLAS is using
-multiple threads, higher flop rates are realized. The number of BLAS threads can be set with
-[`BLAS.set_num_threads(n)`](@ref).
-
-If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all
-the worker processors. The flop rate of the entire parallel computer is returned. When
-running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size
-of the problem that is solved on each processor.
+[`gemm!`](@ref LinearAlgebra.BLAS.gemm!). For more information see
+[`LinearAlgebra.peakflops`](@ref).
+
+!!! note
+    This function will move to the `LinearAlgebra` standard library in the
+    future, and is already available as `LinearAlgebra.peakflops`.
 """
 function peakflops(n::Integer=2000; parallel::Bool=false)
-    a = fill(1.,100,100)
-    t = @elapsed a2 = a*a
-    a = fill(1.,n,n)
-    t = @elapsed a2 = a*a
-    @assert a2[1,1] == n
-    if parallel
-        if !isassigned(Distributed_modref)
-            Distributed_modref[] = Base.require(Base, :Distributed)
-        end
-        Dist = Distributed_modref[]
-        sum(Dist.pmap(peakflops, fill(n, Dist.nworkers())))
-    else
-        2*Float64(n)^3 / t
+    # Base.depwarn("`peakflop`s have moved to the LinearAlgebra module, " *
+    #              "add `using LinearAlgebra` to your imports.", :peakflops)
+    let LinearAlgebra = Base.require(Base.PkgId(
+            Base.UUID((0x37e2e46d_f89d_539d,0xb4ee_838fcccc9c8e)), "LinearAlgebra"))
+        return LinearAlgebra.peakflops(n; parallel = parallel)
     end
 end
 
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index 7717baa2ad708..e941069c13e01 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -415,6 +415,7 @@ LinearAlgebra.adjoint!
 Base.copy(::Union{Transpose,Adjoint})
 LinearAlgebra.stride1
 LinearAlgebra.checksquare
+LinearAlgebra.peakflops
 ```
 
 ## Low-level matrix operations
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index c58f98dd83f31..e30faea9e67bc 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -379,6 +379,36 @@ const ⋅ = dot
 const × = cross
 export ⋅, ×
 
+"""
+    LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false)
+
+`peakflops` computes the peak flop rate of the computer by using double precision
+[`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it
+multiplies a matrix of size `n x n`, where `n = 2000`. If the underlying BLAS is using
+multiple threads, higher flop rates are realized. The number of BLAS threads can be set with
+[`BLAS.set_num_threads(n)`](@ref).
+
+If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all
+the worker processors. The flop rate of the entire parallel computer is returned. When
+running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size
+of the problem that is solved on each processor.
+"""
+function peakflops(n::Integer=2000; parallel::Bool=false)
+    a = fill(1.,100,100)
+    t = @elapsed a2 = a*a
+    a = fill(1.,n,n)
+    t = @elapsed a2 = a*a
+    @assert a2[1,1] == n
+    if parallel
+        let Distributed = Base.require(Base.PkgId(
+                Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
+            return sum(Distributed.pmap(peakflops, fill(n, Distributed.nworkers())))
+        end
+    else
+        return 2*Float64(n)^3 / t
+    end
+end
+
 
 function versioninfo(io::IO=stdout)
     if Base.libblas_name == "libopenblas" || BLAS.vendor() == :openblas || BLAS.vendor() == :openblas64
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index 8591a07aa9f8f..33ae3ddfd0c4c 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -382,4 +382,8 @@ end
     @test ismissing(norm(missing))
 end
 
+@testset "peakflops" begin
+    @test LinearAlgebra.peakflops() > 0
+end
+
 end # module TestGeneric