diff --git a/NEWS.md b/NEWS.md
index 2df1ea68b030a..c45d6e369aab6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -985,6 +985,9 @@ Deprecated or removed
     argument instead of defaulting to using the first dimension unless there is only
     one dimension ([#24684], [#25457]).
 
+  * `cumsum` and `cumprod` have the same promotion behaviour for small integer types as `sum` and
+    `prod`. Use `accumulate(+, x)`/`accumulate(*,x)` to get non-promoting behaviour ([#25766]).
+
   * The `sum_kbn` and `cumsum_kbn` functions have been moved to the
     [KahanSummation](https://github.com/JuliaMath/KahanSummation.jl) package ([#24869]).
 
@@ -1402,6 +1405,7 @@ Command-line option changes
 [#25725]: https://github.com/JuliaLang/julia/issues/25725
 [#25745]: https://github.com/JuliaLang/julia/issues/25745
 [#25763]: https://github.com/JuliaLang/julia/issues/25763
+[#25766]: https://github.com/JuliaLang/julia/issues/25766
 [#25786]: https://github.com/JuliaLang/julia/issues/25786
 [#25812]: https://github.com/JuliaLang/julia/issues/25812
 [#25815]: https://github.com/JuliaLang/julia/issues/25815
diff --git a/base/accumulate.jl b/base/accumulate.jl
new file mode 100644
index 0000000000000..e640804d2f876
--- /dev/null
+++ b/base/accumulate.jl
@@ -0,0 +1,517 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    Accumulate(op[, v0], iter)
+
+An iterator which cumulatively applies the binary operation `op` to the iterator `iter`, in a similar manner
+to [`foldl`](@ref) but returning intermediate values.
+
+If an initial `v0` value is provided, then the first value of the iterator will be
+
+    op(v0, first(iter))
+
+Otherwise it will be
+
+    Base.reduce_first(op, first(iter))
+
+See also [`Base.reduce_first`](@ref) and [`accumulate`](@ref).
+"""
+struct Accumulate{O,V,I}
+    op::O
+    v0::V
+    iter::I
+end
+Accumulate(op, iter) = Accumulate(op, undef, iter) # use `undef` as a sentinel
+
+# the following is largely based on Generator objects
+Base.IteratorEltype(acc::Accumulate) = EltypeUnknown()
+Base.IteratorSize(acc::Accumulate) = Base.IteratorSize(acc.iter)
+
+length(acc::Accumulate) = length(acc.iter)
+size(acc::Accumulate)   = size(acc.iter)
+axes(acc::Accumulate)   = axes(acc.iter)
+ndims(acc::Accumulate)  = ndims(acc.iter)
+
+start(acc::Accumulate) = (@_inline_meta; (start(acc.iter), acc.v0))
+done(acc::Accumulate, accstate) = (@_inline_meta; done(acc.iter, accstate[1]))
+function next(acc::Accumulate, accstate)
+    @_inline_meta
+    itrstate, accval = accstate
+    val, itrstate = next(acc.iter, itrstate)
+
+    accval2 = reduce_first(acc.op, accval, val)
+    return accval2, (itrstate, accval2)
+end
+
+
+"""
+    accumulate(op[, v0], itr; dims::Integer)
+
+Cumulative apply binary operation `op` on an iterable `itr`, along the dimension
+`dims`. If an initial `v0` value is provided, then the first value of the iterator will be
+
+    op(v0, first(iter))
+
+Otherwise it will use
+
+    Base.reduce_first(op, first(iter))
+
+The keyword argument `dims` is optional for 1-dimensional iterators.
+
+See also:
+ - [`accumulate!`](@ref) to use a preallocated output array,
+ - [`cumsum`](@ref) and [`cumprod`](@ref) for specialized versions.
+
+# Examples
+```jldoctest
+julia> accumulate(+, [1,2,3])
+3-element Array{Int64,1}:
+ 1
+ 3
+ 6
+
+julia> accumulate(*, [1,2,3])
+3-element Array{Int64,1}:
+ 1
+ 2
+ 6
+
+julia> accumulate(+, 100, [1,2,3])
+3-element Array{Int64,1}:
+ 101
+ 103
+ 106
+
+julia> accumulate(min, 0, [1,2,-1])
+3-element Array{Int64,1}:
+  0
+  0
+ -1
+
+julia> accumulate(+, fill(1, 3, 3); dims=1)
+3×3 Array{Int64,2}:
+ 1  1  1
+ 2  2  2
+ 3  3  3
+
+julia> accumulate(+, fill(1, 3, 3); dims=2)
+3×3 Array{Int64,2}:
+ 1  2  3
+ 1  2  3
+ 1  2  3
+```
+"""
+@inline accumulate(op, itr; dims::Union{Nothing,Integer}=nothing) = accumulate(op, undef, itr; dims=dims)
+@inline accumulate(op, v0, itr; dims::Union{Nothing,Integer}=nothing) = _accumulate(op, v0, itr, IteratorSize(itr), dims)
+
+@inline function _accumulate(op, v0, itr, ::Union{SizeUnknown,HasLength,IsInfinite,HasShape{1}}, ::Nothing)
+    collect(Accumulate(op, v0, itr))
+end
+
+
+"""
+    accumulate!(op, dest[, v0], itr; dims::Integer)
+
+Cumulative operation `op` on an iterator `itr`, storing the result in `dest`, along the
+dimension `dims` (optional for 1-dimensional iterators).
+
+See also:
+ - [`accumulate`](@ref) for the non-inplace version
+ - [`cumsum!`](@ref) and [`cumprod!`](@ref)
+
+# Examples
+``jldoctest
+julia> x = [1, 0, 2, 0, 3];
+
+julia> y = [0, 0, 0, 0, 0];
+
+julia> accumulate!(+, y, x);
+
+julia> y
+5-element Array{Int64,1}:
+ 1
+ 1
+ 3
+ 3
+ 6
+
+julia> A = [1 2; 3 4];
+
+julia> B = [0 0; 0 0];
+
+julia> accumulate!(B, -, A; dims=1);
+
+julia> B
+2×2 Array{Int64,2}:
+  1   2
+ -2  -2
+
+julia> accumulate!(-, B, A; dims=2);
+
+julia> B
+2×2 Array{Int64,2}:
+ 1  -1
+ 3  -1
+```
+"""
+@inline accumulate!(op, dest, itr; dims::Union{Nothing,Integer}=nothing) = accumulate!(op, dest, undef, itr; dims=dims)
+@inline accumulate!(op, dest, v0, itr; dims::Union{Nothing,Integer}=nothing) = _accumulate!(op, dest, undef, itr, IteratorSize(itr), dims)
+
+function _accumulate!(op::F, dest, v0, x, ::Union{SizeUnknown,HasLength,IsInfinite,HasShape{1}}, ::Nothing) where F
+    src = Accumulate(op, v0, x)
+
+    # this is essentially `copyto!`, but unrolled to deal with potential type instability in first state
+    # hopefully won't be necessary with better Union splitting
+    destiter = eachindex(dest)
+
+    sstate0 = start(src)
+    dstate = start(destiter)
+    sdone = done(src, sstate0)
+    ddone = done(destiter, dstate)
+
+    sdone && ddone && return dest
+    (sdone || ddone) && throw(DimensionMismatch("input and output array sizes and indices must match"))
+
+    i, dstate = next(destiter, dstate)
+    s, sstate = next(src, sstate0)
+    while true
+        @inbounds dest[i] = s
+
+        sdone = done(src, sstate)
+        ddone = done(destiter, dstate)
+
+        sdone && ddone && return dest
+        (sdone || ddone) && throw(DimensionMismatch("input and output array sizes and indices must match"))
+
+        i, dstate = next(destiter, dstate)
+        s, sstate = next(src, sstate)
+    end
+end
+
+function _accumulate(op::F, v0, X, ::HasShape{N}, dim::Integer) where {F,N}
+    dim > 0 || throw(ArgumentError("dim must be a positive integer"))
+    if isempty(X)
+        # fallback on collect machinery
+        return collect(Accumulate(op, v0, X))
+    end
+    inds = CartesianIndices(axes(X))
+    first_in_dim = first(axes(X, dim))
+    dim_delta = CartesianIndex(ntuple(d -> d==dim ? 1 : 0, Val{N}()))
+    st = start(inds)
+    i, st = next(inds, st)
+    v = reduce_first(op, v0, X[i])
+    dest = similar(X, typeof(v))
+    dest[i] = v
+    if dim > N
+        return _accum_only_v0!(op, dest, v0, X, inds, st, true)
+    end
+    return _accumulate!(op, dest, v0, X, dim, inds, st, first_in_dim, dim_delta, true)
+end
+
+function _accumulate!(op::F, dest, v0, X, ::HasShape{N}, dim::Integer) where {F,N}
+    dim > 0 || throw(ArgumentError("dim must be a positive integer"))
+    axes(dest) == axes(X) || throw(DimensionMismatch("shape of source and destination must match"))
+    inds = CartesianIndices(axes(X))
+    first_in_dim = first(axes(X, dim))
+    dim_delta = CartesianIndex(ntuple(d -> d==dim ? 1 : 0, Val{N}()))
+    st = start(inds)
+    if dim > N
+        return _accum_only_v0!(op, dest, v0, X, inds, st, false)
+    end
+    return _accumulate!(op, dest, v0, X, dim, inds, st, first_in_dim, dim_delta, false)
+end
+
+function _accumulate!(op::F, dest::AbstractArray{T}, v0, X, dim, inds, st, first_in_dim, dim_delta, widen) where {F,T}
+    while !done(inds, st)
+        i, st = next(inds, st)
+        if @inbounds i[dim] == first_in_dim
+            v = reduce_first(op, v0, @inbounds X[i])
+        else
+            v = op(@inbounds(dest[i - dim_delta]), @inbounds(X[i]))
+        end
+        if !widen || v isa T
+            @inbounds dest[i] = v
+        else
+            R = promote_typejoin(T, typeof(v))
+            dest´ = similar(dest, R)
+            copyto!(dest´, 1, dest, 1, linearindices(dest)[i]-1)
+            @inbounds dest´[i] = v
+            return _accumulate!(op, dest´, v0, X, dim, inds, st, first_in_dim, dim_delta, widen)
+        end
+    end
+    return dest
+end
+
+function _accum_only_v0!(op::F, dest::AbstractArray{T}, v0, X, inds, st, widen) where {F,T}
+    while !done(inds, st)
+        i, st = next(inds, st)
+        v = reduce_first(op, v0, @inbounds X[i])
+        if !widen || v isa T
+            @inbounds dest[i] = v
+        else
+            R = promote_typejoin(T, typeof(v))
+            dest´ = similar(dest, R)
+            copyto!(dest´, 1, dest, 1, linearindices(dest)[i]-1)
+            @inbounds dest´[i] = v
+            return _accum_only_v0!(op, dest´, v0, X, inds, st, widen)
+        end
+    end
+    return dest
+end
+
+
+"""
+    accumulate_pairwise(op[, v0], itr)
+
+Similar to [`accumulate`](@ref), but performs accumulation using a divide-and-conquer
+approach, which can be more numerically accurate for certain operations, such as
+[`+`](@ref).
+
+It involves roughly double the number of `op` calls, but for cheap operations like `+`
+this does not have much impact (approximately 20%).
+"""
+@inline accumulate_pairwise(op, itr) = accumulate_pairwise(op, undef, itr)
+@inline accumulate_pairwise(op, v0, itr) =  accumulate_pairwise(op, v0, itr, IteratorSize(itr))
+function accumulate_pairwise(op::F, v0, itr, ::Union{HasLength,HasShape{1}}) where F
+    i = start(itr)
+    if done(itr,i)
+        return collect(Accumulate(op, v0, itr))
+    end
+    v1,i = next(itr,i)
+    y = reduce_first(op,v0,v1)
+
+    Y = _similar_for(1:1, typeof(y), itr, IteratorSize(itr))
+    L = linearindices(Y)
+    n = length(L)
+    j = first(L)
+
+    while true
+        Y[j] = y
+        if done(itr,i)
+            return Y
+        end
+        y,j,i,wider = _accumulate_pairwise!(op,Y,itr,y,j+1,i,last(L)-j,true)
+        if !wider
+            return Y
+        end
+        R = promote_typejoin(eltype(Y), typeof(y))
+        newY = similar(Y, R)
+        copyto!(newY,1,Y,1,j)
+        Y = newY
+    end
+end
+
+@inline accumulate_pairwise!(op, dest, itr) = accumulate_pairwise!(op, dest, undef, itr)
+function accumulate_pairwise!(op::F, Y, v0, itr) where F
+    L = linearindices(Y)
+    L == linearindices(itr) || throw(DimensionMismatch("indices of source and destination must match"))
+
+    n = length(L)
+
+    i = start(itr)
+    v1,i = next(itr,i)
+
+    j = first(L)
+    Y[j] = y = reduce_first(op,v0,v1)
+    _accumulate_pairwise!(op,Y,itr,y,j+1,i,n-1,false)
+    return Y
+end
+
+function _accumulate_pairwise!(op::F,Y,X,y0,j,i,m,widen) where F
+    if m < 128 # m >= 1
+        @inbounds begin
+            x,i = next(X,i)
+            y = op(y0, x)
+            if widen && !isa(y, eltype(Y))
+                return y, j, i, true
+            end
+            Y[j] = y
+
+            # to keep type stability, could also unroll loop?
+            w = reduce_first(op, x)
+            for k = 1:m-1
+                j += 1
+                x,i = next(X,i)
+                w = op(w,  x)
+                y = op(y0, w)
+                if widen && !isa(y, eltype(Y))
+                    return y, j, i, true
+                end
+                Y[j] = y
+            end
+            return w, j+1, i, false
+        end
+    else
+        m1 = m >> 1
+        v1, j, i, wider = _accumulate_pairwise!(op,Y,X,y0,j,i,m1,widen)
+        wider && return v1, j, i, wider
+        v2, j, i, wider = _accumulate_pairwise!(op,Y,X,op(y0,v1),j,i,m-m1,widen)
+        wider && return v1, j, i, wider
+        return op(v1, v2), j, i, false
+    end
+end
+
+
+
+"""
+    cumsum(A; dims::Integer)
+
+Cumulative sum `A` along the dimension `dims` (`dims` is option for 1-dimensional
+objects). This has the same promotion behaviour as [`sum`](@ref), namely that lower-width
+integer types are promoted to `Int`/`UInt`.
+
+See also:
+ - [`cumsum!`](@ref) to use a preallocated output array, both for performance and
+to control the precision of the output (e.g. to avoid overflow).
+ - [`accumulate`](@ref) for accumulation using other operators.
+
+# Examples
+```jldoctest
+julia> cumsum([1, 1, 1])
+3-element Array{Int64,1}:
+ 1
+ 2
+ 3
+
+julia> cumsum([fill(1, 2) for i in 1:3])
+3-element Array{Array{Int64,1},1}:
+ [1, 1]
+ [2, 2]
+ [3, 3]
+
+julia> a = [1 2 3; 4 5 6]
+2×3 Array{Int64,2}:
+ 1  2  3
+ 4  5  6
+
+julia> cumsum(a; dims=1)
+2×3 Array{Int64,2}:
+ 1  2  3
+ 5  7  9
+
+julia> cumsum(a; dims=2)
+2×3 Array{Int64,2}:
+ 1  3   6
+ 4  9  15
+```
+"""
+function cumsum(A; dims::Union{Nothing,Integer}=nothing)
+    if A isa AbstractArray && ndims(A) > 1 && dims === nothing
+        depwarn("`cumsum(A::AbstractArray)` is deprecated, use `cumsum(A; dims=1)` instead.", :cumsum)
+        dims = 1
+    end
+    accumulate(add_sum, A; dims=dims)
+end
+
+function cumsum(v::AbstractVector{T}) where T
+    # we dispatch on the possibility of numerical accuracy issues
+    _cumsum(v, ArithmeticStyle(T))
+end
+_cumsum(v::AbstractVector, ::ArithmeticRounds)  = accumulate_pairwise(add_sum, v)
+_cumsum(v::AbstractVector, ::ArithmeticUnknown) = accumulate_pairwise(add_sum, v)
+_cumsum(v::AbstractVector, ::ArithmeticStyle)   = accumulate(add_sum, v)
+
+
+"""
+    Base.ConvertOp{T}(op)(x,y)
+
+An operator which converts `x` and `y` to type `T` before performing the `op`.
+
+The main purpose is for use in [`cumsum!`](@ref) and [`cumprod!`](@ref), where `T` is determined by the output array.
+"""
+struct ConvertOp{T,O} <: Function
+    op::O
+end
+ConvertOp{T}(op::O) where {T,O} = ConvertOp{T,O}(op)
+(c::ConvertOp{T})(x,y) where {T} = c.op(convert(T,x),convert(T,y))
+reduce_first(c::ConvertOp{T},x) where {T} = reduce_first(c.op, convert(T,x))
+
+
+
+
+"""
+    cumsum!(B, A; dims::Integer)
+
+Cumulative sum of `A` along the dimension `dims`, storing the result in `B`. `dims` is
+optional for 1-dimensional objects.
+
+Elements of `A` are first converted to the element type of `B` before addition, so as to
+improve accuracy and reduce occurences of arithemtic overflow.
+
+See also [`cumsum`](@ref).
+"""
+cumsum!(dest::AbstractArray{T}, A; dims::Union{Nothing,Integer}=nothing) where {T} =
+    accumulate!(ConvertOp{T}(+), dest, A; dims=dims)
+
+function cumsum!(out::AbstractVector, v::AbstractVector{T}) where T
+    # we dispatch on the possibility of numerical accuracy issues
+    cumsum!(out, v, ArithmeticStyle(T))
+end
+cumsum!(out::AbstractVector{T}, v::AbstractVector, ::ArithmeticRounds) where {T} =
+    accumulate_pairwise!(ConvertOp{T}(+), out, v)
+cumsum!(out::AbstractVector{T}, v::AbstractVector, ::ArithmeticUnknown) where {T} =
+    accumulate_pairwise!(ConvertOp{T}(+), out, v)
+cumsum!(out::AbstractVector{T}, v::AbstractVector, ::ArithmeticStyle) where {T} =
+    accumulate!(ConvertOp{T}(+), out, v)
+
+
+"""
+    cumprod(A; dims::Integer)
+
+Cumulative product along the dimension `dims` (`dims` is optional for 1-dimensional objects).
+
+See also:
+ - [`cumprod!`](@ref) to use a preallocated output array, both for performance and
+to control the precision of the output (e.g. to avoid overflow).
+
+# Examples
+```jldoctest
+julia> cumprod(fill(1//2, 3))
+3-element Array{Rational{Int64},1}:
+ 1//2
+ 1//4
+ 1//8
+
+julia> cumprod([fill(1//3, 2, 2) for i in 1:3])
+3-element Array{Array{Rational{Int64},2},1}:
+ [1//3 1//3; 1//3 1//3]
+ [2//9 2//9; 2//9 2//9]
+ [4//27 4//27; 4//27 4//27]
+
+julia> a = [1 2 3; 4 5 6]
+2×3 Array{Int64,2}:
+ 1  2  3
+ 4  5  6
+
+julia> cumprod(a; dims=1)
+2×3 Array{Int64,2}:
+ 1   2   3
+ 4  10  18
+
+julia> cumprod(a; dims=2)
+2×3 Array{Int64,2}:
+ 1   2    6
+ 4  20  120
+```
+"""
+function cumprod(A; dims::Union{Nothing,Integer}=nothing)
+    if A isa AbstractArray && ndims(A) > 1 && dims === nothing
+        depwarn("`cumprod(A::AbstractArray)` is deprecated, use `cumprod(A; dims=1)` instead.", :cumprod)
+        dims = 1
+    end
+    accumulate(mul_prod, A; dims=dims)
+end
+
+"""
+    cumprod!(B, A; dims::Integer)
+
+Cumulative product of `A` along the dimension `dims`, storing the result in `B`. `dims` is
+optional for 1-dimensional objects.
+
+Elements of `A` are first converted to the element type of `B` before multiplication, so
+as to improve accuracy and reduce occurences of arithemtic overflow.
+
+See also [`cumprod`](@ref).
+"""
+cumprod!(dest::AbstractArray{T}, A; dims::Union{Nothing,Integer}=nothing) where {T} =
+    accumulate!(ConvertOp{T}(*), dest, A; dims=dims)
diff --git a/base/array.jl b/base/array.jl
index 0f3297f0143e6..8dfb5d47b8ec3 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -562,8 +562,9 @@ function _collect(c, itr, ::EltypeUnknown, isz::Union{HasLength,HasShape})
     if done(itr,st)
         return _similar_for(c, @default_eltype(itr), itr, isz)
     end
-    v1, st = next(itr, st)
-    collect_to_with_first!(_similar_for(c, typeof(v1), itr, isz), v1, itr, st)
+    v1, st2 = next(itr, st)
+    # change name of state variable so that Accumulate is type stable
+    collect_to_with_first!(_similar_for(c, typeof(v1), itr, isz), v1, itr, st2)
 end
 
 function collect_to_with_first!(dest::AbstractArray, v1, itr, st)
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 45f97600af000..4385fa057ba8a 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -678,405 +678,6 @@ _iterable(v) = Iterators.repeated(v)
     end
 end
 
-# see discussion in #18364 ... we try not to widen type of the resulting array
-# from cumsum or cumprod, but in some cases (+, Bool) we may not have a choice.
-rcum_promote_type(op, ::Type{T}, ::Type{S}) where {T,S<:Number} = promote_op(op, T, S)
-rcum_promote_type(op, ::Type{T}) where {T<:Number} = rcum_promote_type(op, T,T)
-rcum_promote_type(op, ::Type{T}) where {T} = T
-
-# handle sums of Vector{Bool} and similar.   it would be nice to handle
-# any AbstractArray here, but it's not clear how that would be possible
-rcum_promote_type(op, ::Type{Array{T,N}}) where {T,N} = Array{rcum_promote_type(op,T), N}
-
-# accumulate_pairwise slightly slower then accumulate, but more numerically
-# stable in certain situations (e.g. sums).
-# it does double the number of operations compared to accumulate,
-# though for cheap operations like + this does not have much impact (20%)
-function _accumulate_pairwise!(op::Op, c::AbstractVector{T}, v::AbstractVector, s, i1, n)::T where {T,Op}
-    @inbounds if n < 128
-        s_ = v[i1]
-        c[i1] = op(s, s_)
-        for i = i1+1:i1+n-1
-            s_ = op(s_, v[i])
-            c[i] = op(s, s_)
-        end
-    else
-        n2 = n >> 1
-        s_ = _accumulate_pairwise!(op, c, v, s, i1, n2)
-        s_ = op(s_, _accumulate_pairwise!(op, c, v, op(s, s_), i1+n2, n-n2))
-    end
-    return s_
-end
-
-function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector) where Op
-    li = linearindices(v)
-    li != linearindices(result) && throw(DimensionMismatch("input and output array sizes and indices must match"))
-    n = length(li)
-    n == 0 && return result
-    i1 = first(li)
-    @inbounds result[i1] = v1 = v[i1]
-    n == 1 && return result
-    _accumulate_pairwise!(op, result, v, v1, i1+1, n-1)
-    return result
-end
-
-function accumulate_pairwise(op, v::AbstractVector{T}) where T
-    out = similar(v, rcum_promote_type(op, T))
-    return accumulate_pairwise!(op, out, v)
-end
-
-function cumsum!(out, v::AbstractVector; dims::Integer=1)
-    # we dispatch on the possibility of numerical stability issues
-    _cumsum!(out, v, dims, ArithmeticStyle(eltype(out)))
-end
-
-function _cumsum!(out, v, dim, ::ArithmeticRounds)
-    dim == 1 ? accumulate_pairwise!(+, out, v) : copyto!(out, v)
-end
-function _cumsum!(out, v, dim, ::ArithmeticUnknown)
-    _cumsum!(out, v, dim, ArithmeticRounds())
-end
-function _cumsum!(out, v, dim, ::ArithmeticStyle)
-    dim == 1 ? accumulate!(+, out, v) : copyto!(out, v)
-end
-
-"""
-    cumsum(A; dims::Integer)
-
-Cumulative sum along the dimension `dims`. See also [`cumsum!`](@ref)
-to use a preallocated output array, both for performance and to control the precision of the
-output (e.g. to avoid overflow).
-
-# Examples
-```jldoctest
-julia> a = [1 2 3; 4 5 6]
-2×3 Array{Int64,2}:
- 1  2  3
- 4  5  6
-
-julia> cumsum(a, dims=1)
-2×3 Array{Int64,2}:
- 1  2  3
- 5  7  9
-
-julia> cumsum(a, dims=2)
-2×3 Array{Int64,2}:
- 1  3   6
- 4  9  15
-```
-"""
-function cumsum(A::AbstractArray{T}; dims::Union{Nothing,Integer}=nothing) where T
-    if dims === nothing
-        depwarn("`cumsum(A::AbstractArray)` is deprecated, use `cumsum(A, dims=1)` instead.", :cumsum)
-        dims = 1
-    end
-    out = similar(A, rcum_promote_type(+, T))
-    cumsum!(out, A, dims=dims)
-end
-
-"""
-    cumsum(x::AbstractVector)
-
-Cumulative sum a vector. See also [`cumsum!`](@ref)
-to use a preallocated output array, both for performance and to control the precision of the
-output (e.g. to avoid overflow).
-
-# Examples
-```jldoctest
-julia> cumsum([1, 1, 1])
-3-element Array{Int64,1}:
- 1
- 2
- 3
-
-julia> cumsum([fill(1, 2) for i in 1:3])
-3-element Array{Array{Int64,1},1}:
- [1, 1]
- [2, 2]
- [3, 3]
-```
-"""
-cumsum(x::AbstractVector) = cumsum(x, dims=1)
-
-"""
-    cumsum!(B, A; dims::Integer)
-
-Cumulative sum of `A` along the dimension `dims`, storing the result in `B`. See also [`cumsum`](@ref).
-"""
-cumsum!(B, A; dims::Integer) = accumulate!(+, B, A, dims=dims)
-
-"""
-    cumprod(A; dims::Integer)
-
-Cumulative product along the dimension `dim`. See also
-[`cumprod!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow).
-
-# Examples
-```jldoctest
-julia> a = [1 2 3; 4 5 6]
-2×3 Array{Int64,2}:
- 1  2  3
- 4  5  6
-
-julia> cumprod(a, dims=1)
-2×3 Array{Int64,2}:
- 1   2   3
- 4  10  18
-
-julia> cumprod(a, dims=2)
-2×3 Array{Int64,2}:
- 1   2    6
- 4  20  120
-```
-"""
-function cumprod(A::AbstractArray; dims::Union{Nothing,Integer}=nothing)
-    if dims === nothing
-        depwarn("`cumprod(A::AbstractArray)` is deprecated, use `cumprod(A, dims=1)` instead.", :cumprod)
-        dims = 1
-    end
-    return accumulate(*, A, dims=dims)
-end
-
-"""
-    cumprod(x::AbstractVector)
-
-Cumulative product of a vector. See also
-[`cumprod!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow).
-
-# Examples
-```jldoctest
-julia> cumprod(fill(1//2, 3))
-3-element Array{Rational{Int64},1}:
- 1//2
- 1//4
- 1//8
-
-julia> cumprod([fill(1//3, 2, 2) for i in 1:3])
-3-element Array{Array{Rational{Int64},2},1}:
- [1//3 1//3; 1//3 1//3]
- [2//9 2//9; 2//9 2//9]
- [4//27 4//27; 4//27 4//27]
-```
-"""
-cumprod(x::AbstractVector) = cumprod(x, dims=1)
-
-"""
-    cumprod!(B, A; dims::Integer)
-
-Cumulative product of `A` along the dimension `dims`, storing the result in `B`.
-See also [`cumprod`](@ref).
-"""
-cumprod!(B, A; dims::Integer) = accumulate!(*, B, A, dims=dims)
-
-"""
-    cumprod!(y::AbstractVector, x::AbstractVector)
-
-Cumulative product of a vector `x`, storing the result in `y`.
-See also [`cumprod`](@ref).
-"""
-cumprod!(y::AbstractVector, x::AbstractVector) = cumprod!(y, x, dims=1)
-
-"""
-    accumulate(op, A; dims::Integer)
-
-Cumulative operation `op` along the dimension `dims`. See also
-[`accumulate!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow). For common operations
-there are specialized variants of `accumulate`, see:
-[`cumsum`](@ref), [`cumprod`](@ref)
-
-# Examples
-```jldoctest
-julia> accumulate(+, fill(1, 3, 3), dims=1)
-3×3 Array{Int64,2}:
- 1  1  1
- 2  2  2
- 3  3  3
-
-julia> accumulate(+, fill(1, 3, 3), dims=2)
-3×3 Array{Int64,2}:
- 1  2  3
- 1  2  3
- 1  2  3
-```
-"""
-function accumulate(op, A; dims::Integer)
-    out = similar(A, rcum_promote_type(op, eltype(A)))
-    accumulate!(op, out, A, dims=dims)
-end
-
-"""
-    accumulate(op, x::AbstractVector)
-
-Cumulative operation `op` on a vector. See also
-[`accumulate!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow). For common operations
-there are specialized variants of `accumulate`, see:
-[`cumsum`](@ref), [`cumprod`](@ref)
-
-# Examples
-```jldoctest
-julia> accumulate(+, [1,2,3])
-3-element Array{Int64,1}:
- 1
- 3
- 6
-
-julia> accumulate(*, [1,2,3])
-3-element Array{Int64,1}:
- 1
- 2
- 6
-```
-"""
-accumulate(op, x::AbstractVector) = accumulate(op, x, dims=1)
-
-"""
-    accumulate!(op, B, A; dims::Integer)
-
-Cumulative operation `op` on `A` along the dimension `dims`, storing the result in `B`.
-See also [`accumulate`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1 2; 3 4];
-
-julia> B = [0 0; 0 0];
-
-julia> accumulate!(-, B, A, dims=1);
-
-julia> B
-2×2 Array{Int64,2}:
-  1   2
- -2  -2
-
-julia> accumulate!(-, B, A, dims=2);
-
-julia> B
-2×2 Array{Int64,2}:
- 1  -1
- 3  -1
-```
-"""
-function accumulate!(op, B, A; dims::Integer)
-    dim = dims
-    dim > 0 || throw(ArgumentError("dim must be a positive integer"))
-    inds_t = axes(A)
-    axes(B) == inds_t || throw(DimensionMismatch("shape of B must match A"))
-    dim > ndims(A) && return copyto!(B, A)
-    isempty(inds_t[dim]) && return B
-    if dim == 1
-        # We can accumulate to a temporary variable, which allows
-        # register usage and will be slightly faster
-        ind1 = inds_t[1]
-        @inbounds for I in CartesianIndices(tail(inds_t))
-            tmp = convert(eltype(B), A[first(ind1), I])
-            B[first(ind1), I] = tmp
-            for i_1 = first(ind1)+1:last(ind1)
-                tmp = op(tmp, A[i_1, I])
-                B[i_1, I] = tmp
-            end
-        end
-    else
-        R1 = CartesianIndices(axes(A)[1:dim-1])   # not type-stable
-        R2 = CartesianIndices(axes(A)[dim+1:end])
-        _accumulate!(op, B, A, R1, inds_t[dim], R2) # use function barrier
-    end
-    return B
-end
-
-"""
-    accumulate!(op, y, x::AbstractVector)
-
-Cumulative operation `op` on a vector `x`, storing the result in `y`.
-See also [`accumulate`](@ref).
-
-# Examples
-``jldoctest
-julia> x = [1, 0, 2, 0, 3];
-
-julia> y = [0, 0, 0, 0, 0];
-
-julia> accumulate!(+, y, x);
-
-julia> y
-5-element Array{Int64,1}:
- 1
- 1
- 3
- 3
- 6
-```
-"""
-function accumulate!(op::Op, y, x::AbstractVector) where Op
-    isempty(x) && return y
-    v1 = first(x)
-    _accumulate1!(op, y, v1, x, 1)
-end
-
-@noinline function _accumulate!(op, B, A, R1, ind, R2)
-    # Copy the initial element in each 1d vector along dimension `dim`
-    ii = first(ind)
-    @inbounds for J in R2, I in R1
-        B[I, ii, J] = A[I, ii, J]
-    end
-    # Accumulate
-    @inbounds for J in R2, i in first(ind)+1:last(ind), I in R1
-        B[I, i, J] = op(B[I, i-1, J], A[I, i, J])
-    end
-    B
-end
-
-"""
-    accumulate(op, v0, x::AbstractVector)
-
-Like `accumulate`, but using a starting element `v0`. The first entry of the result will be
-`op(v0, first(A))`.
-
-# Examples
-```jldoctest
-julia> accumulate(+, 100, [1,2,3])
-3-element Array{Int64,1}:
- 101
- 103
- 106
-
-julia> accumulate(min, 0, [1,2,-1])
-3-element Array{Int64,1}:
-  0
-  0
- -1
-```
-"""
-function accumulate(op, v0, x::AbstractVector)
-    T = rcum_promote_type(op, typeof(v0), eltype(x))
-    out = similar(x, T)
-    accumulate!(op, out, v0, x)
-end
-
-function accumulate!(op, y, v0, x::AbstractVector)
-    isempty(x) && return y
-    v1 = op(v0, first(x))
-    _accumulate1!(op, y, v1, x, 1)
-end
-
-function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
-    dim > 0 || throw(ArgumentError("dim must be a positive integer"))
-    inds = linearindices(A)
-    inds == linearindices(B) || throw(DimensionMismatch("linearindices of A and B don't match"))
-    dim > 1 && return copyto!(B, A)
-    i1 = inds[1]
-    cur_val = v1
-    B[i1] = cur_val
-    @inbounds for i in inds[2:end]
-        cur_val = op(cur_val, A[i])
-        B[i] = cur_val
-    end
-    return B
-end
-
 diff(a::AbstractVector) = [ a[i+1] - a[i] for i=1:length(a)-1 ]
 
 """
diff --git a/base/reduce.jl b/base/reduce.jl
index 195c0adc85009..07d91ad6cfc6e 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -21,6 +21,13 @@ integers are promoted to `Int`/`UInt`.
 add_sum(x,y) = x + y
 add_sum(x::SmallSigned,y::SmallSigned) = Int(x) + Int(y)
 add_sum(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) + UInt(y)
+add_sum(X::AbstractArray,Y::AbstractArray) = (promote_shape(X,Y); broadcast(add_sum, X, Y))
+
+add_sum(x) = +(x)
+add_sum(x::SmallSigned) = Int(x)
+add_sum(x::SmallUnsigned) = UInt(x)
+add_sum(X::AbstractArray) = broadcast(add_sum, X)
+
 
 """
     Base.mul_prod(x,y)
@@ -32,6 +39,10 @@ mul_prod(x,y) = x * y
 mul_prod(x::SmallSigned,y::SmallSigned) = Int(x) * Int(y)
 mul_prod(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) * UInt(y)
 
+mul_prod(x) = *(x)
+mul_prod(x::SmallSigned) = Int(x)
+mul_prod(x::SmallUnsigned) = UInt(x)
+
 ## foldl && mapfoldl
 
 @noinline function mapfoldl_impl(f, op, v0, itr, i)
@@ -301,20 +312,22 @@ The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref`) or
 `x`. This value may also used to initialise the recursion, so that `reduce(op, [x, y])`
 may call `op(reduce_first(op, x), y)`.
 
-The default is `x` for most types. The main purpose is to ensure type stability, so
+The default is `x` for most operators. The main purpose is to ensure type stability, so
 additional methods should only be defined for cases where `op` gives a result with
 different types than its inputs.
 """
 reduce_first(op, x) = x
-reduce_first(::typeof(+), x::Bool) = Int(x)
-reduce_first(::typeof(*), x::AbstractChar) = string(x)
-
-reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
-reduce_first(::typeof(add_sum), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(add_sum), x::SmallUnsigned) = UInt(x)
-reduce_first(::typeof(mul_prod), x) = reduce_first(*, x)
-reduce_first(::typeof(mul_prod), x::SmallSigned)   = Int(x)
-reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)
+
+
+# for existing operators we can use unary forms which already handle promotion correctly
+reduce_first(::typeof(+), x) = +(x)
+reduce_first(::typeof(*), x) = *(x)
+reduce_first(::typeof(add_sum), x) = add_sum(x)
+reduce_first(::typeof(mul_prod), x) = mul_prod(x)
+
+# called by accumulate, which uses `undef` as a sentinel value
+reduce_first(op, v0, x) = op(v0,x)
+reduce_first(op, ::UndefInitializer, x) = reduce_first(op, x)
 
 """
     Base.mapreduce_first(f, op, x)
diff --git a/base/sysimg.jl b/base/sysimg.jl
index f057ce998b121..5463a02edc39a 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -349,6 +349,7 @@ const (∛)=cbrt
 INCLUDE_STATE = 2 # include = _include (from lines above)
 
 # reduction along dims
+include("accumulate.jl")
 include("reducedim.jl")  # macros in this file relies on string.jl
 
 # basic data structures
diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
index bbf0797a0e8c3..5d1196bcab9c9 100644
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ b/stdlib/SparseArrays/src/sparsematrix.jl
@@ -2306,7 +2306,7 @@ function getindex(A::SparseMatrixCSC{Tv,Ti}, I::AbstractArray) where {Tv,Ti}
             end
         end
     end
-    colptrB = cumsum(colptrB)
+    colptrB = accumulate(+,colptrB) # cumsum would promote incorrectly
     if n > (idxB-1)
         deleteat!(nzvalB, idxB:n)
         deleteat!(rowvalB, idxB:n)
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 26045e4371aa6..8600e87c9d8d0 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -2054,9 +2054,10 @@ end
 # issue #18363
 @test_throws DimensionMismatch cumsum!([0,0], 1:4)
 @test cumsum(Any[])::Vector{Any} == Any[]
-@test cumsum(Any[1, 2.3])::Vector{Any} == [1, 3.3] == cumsum(Real[1, 2.3])::Vector{Real}
+@test cumsum(Any[1, 2.3]) == [1, 3.3] == cumsum(Real[1, 2.3])::Vector{Real}
 @test cumsum([true,true,true]) == [1,2,3]
-@test cumsum(0x00:0xff)[end] === 0x80 # overflow
+@test cumsum(0x00:0xff)[end] === UInt(255*(255+1)÷2) # no overflow
+@test accumulate(+, 0x00:0xff)[end] === 0x80         # overflow
 @test cumsum([[true], [true], [false]])::Vector{Vector{Int}} == [[1], [2], [2]]
 
 #issue #18336
@@ -2281,6 +2282,8 @@ end
 
 Base.ArithmeticStyle(::Type{F21666{T}}) where {T} = T()
 Base.:+(x::F, y::F) where {F <: F21666} = F(x.x + y.x)
+Base.:+(x::F) where {F <: F21666} = x
+
 Float64(x::F21666) = Float64(x.x)
 @testset "Exactness of cumsum # 21666" begin
     # test that cumsum uses more stable algorithm
@@ -2410,3 +2413,25 @@ Base.view(::T25958, args...) = args
     @test t[end,end,1]   == @view(t[end,end,1])   == @views t[end,end,1]
     @test t[end,end,end] == @view(t[end,end,end]) == @views t[end,end,end]
 end
+
+@testset "accumulate on non-numeric types" begin
+    @test accumulate((acc, x) -> acc+x[1], 0, [(1,2), (3,4), (5,6)]) == [1, 4, 9]
+    @test accumulate(*, ['a', 'b']) == ["a", "ab"]
+    @inferred accumulate(*, String[])
+    @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=1) == ["a" "b"; "ac" "bd"]
+    @test accumulate(*, ['a' 'b'; 'c' 'd'], dims=2) == ["a" "ab"; "c" "cd"]
+end
+
+@testset "accumulate promotion" begin
+    U = cumsum(Any[1,1//1,1f0,1.0,big(1.0)])
+    @test U[1] == 1
+    @test U[1] isa Int
+    @test U[2] == 2
+    @test U[2] isa Rational{Int}
+    @test U[3] == 3
+    @test U[3] isa Float32
+    @test U[4] == 4
+    @test U[4] isa Float64
+    @test U[5] == 5
+    @test U[5] isa BigFloat
+end
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index afca91b3668dd..43d21d62a8b24 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -345,8 +345,10 @@ C = similar(A)
 cumsum!(C, A, dims=1)
 @test parent(C) == cumsum(parent(A), dims=1)
 @test parent(cumsum(A, dims=1)) == cumsum(parent(A), dims=1)
+@test C == cumsum(A, dims=1)
 cumsum!(C, A, dims=2)
 @test parent(C) == cumsum(parent(A), dims=2)
+@test C == cumsum(A, dims=2)
 R = similar(A, (1:1, 6:9))
 maximum!(R, A)
 @test parent(R) == maximum(parent(A), dims=1)