Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mapreduce_single function #25051

Merged
merged 1 commit into from
Jan 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 97 additions & 35 deletions base/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,25 @@ else
const SmallUnsigned = Union{UInt8,UInt16,UInt32}
end

# Certain reductions like sum and prod may wish to promote the items being reduced over to
# an appropriate size. Note we need x + zero(x) because some types like Bool have their sum
# lie in a larger type.
promote_sys_size(T::Type) = T
promote_sys_size(::Type{<:SmallSigned}) = Int
promote_sys_size(::Type{<:SmallUnsigned}) = UInt

promote_sys_size_add(x) = convert(promote_sys_size(typeof(x + zero(x))), x)
promote_sys_size_mul(x) = convert(promote_sys_size(typeof(x * one(x))), x)
const _PromoteSysSizeFunction = Union{typeof(promote_sys_size_add),
typeof(promote_sys_size_mul)}
"""
Base.add_sum(x,y)

The reduction operator used in `sum`. The main difference from [`+`](@ref) is that small
integers are promoted to `Int`/`UInt`.
"""
add_sum(x,y) = x + y
add_sum(x::SmallSigned,y::SmallSigned) = Int(x) + Int(y)
add_sum(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) + UInt(y)

"""
Base.mul_prod(x,y)

The reduction operator used in `prod`. The main difference from [`*`](@ref) is that small
integers are promoted to `Int`/`UInt`.
"""
mul_prod(x,y) = x * y
mul_prod(x::SmallSigned,y::SmallSigned) = Int(x) * Int(y)
mul_prod(x::SmallUnsigned,y::SmallUnsigned) = UInt(x) * UInt(y)

## foldl && mapfoldl

Expand Down Expand Up @@ -64,7 +72,7 @@ function mapfoldl(f, op, itr)
return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
end
(x, i) = next(itr, i)
v0 = f(x)
v0 = mapreduce_first(f, op, x)
mapfoldl_impl(f, op, v0, itr, i)
end

Expand Down Expand Up @@ -133,7 +141,7 @@ function mapfoldr(f, op, itr)
if isempty(itr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated comment, which can be done as part of this PR or in a later one: Since we have #24187 Iterators.Reverse now, I think we can write these mapfoldr functions generically. As it stands it only works with arrays, and this generalization may allow it to work with arbitrary reversible iterators.

return Base.mapreduce_empty_iter(f, op, itr, iteratoreltype(itr))
end
return mapfoldr_impl(f, op, f(itr[i]), itr, i-1)
return mapfoldr_impl(f, op, mapreduce_first(f, op, itr[i]), itr, i-1)
end

"""
Expand Down Expand Up @@ -174,7 +182,7 @@ foldr(op, itr) = mapfoldr(identity, op, itr)
@noinline function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer, blksize::Int)
if ifirst == ilast
@inbounds a1 = A[ifirst]
return f(a1)
return mapreduce_first(f, op, a1)
elseif ifirst + blksize > ilast
# sequential portion
@inbounds a1 = A[ifirst]
Expand Down Expand Up @@ -238,34 +246,88 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096

# handling empty arrays
_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))

"""
Base.reduce_empty(op, T)

The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref) or [`foldr`](@ref)
with reduction `op` over an empty array with element type of `T`.

If not defined, this will throw an `ArgumentError`.
"""
reduce_empty(op, T) = _empty_reduce_error()
reduce_empty(::typeof(+), T) = zero(T)
reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
reduce_empty(::typeof(*), T) = one(T)
reduce_empty(::typeof(*), ::Type{Char}) = ""
reduce_empty(::typeof(&), ::Type{Bool}) = true
reduce_empty(::typeof(|), ::Type{Bool}) = false

reduce_empty(::typeof(add_sum), T) = reduce_empty(+, T)
reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned} = zero(Int)
reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
reduce_empty(::typeof(mul_prod), T) = reduce_empty(*, T)
reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned} = one(Int)
reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)

"""
Base.mapreduce_empty(f, op, T)

The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
[`mapfoldr`](@ref) with map `f` and reduction `op` over an empty array with element type
of `T`.

If not defined, this will throw an `ArgumentError`.
"""
mapreduce_empty(f, op, T) = _empty_reduce_error()
mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
mapreduce_empty(f::_PromoteSysSizeFunction, op, T) =
f(mapreduce_empty(identity, op, T))
mapreduce_empty(::typeof(abs), ::typeof(+), T) = abs(zero(T))
mapreduce_empty(::typeof(abs2), ::typeof(+), T) = abs2(zero(T))
mapreduce_empty(::typeof(abs), ::Union{typeof(scalarmax), typeof(max)}, T) =
abs(zero(T))
mapreduce_empty(::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) =
abs2(zero(T))

# Allow mapreduce_empty to “see through” promote_sys_size
let ComposedFunction = typename(typeof(identity ∘ identity)).wrapper
global mapreduce_empty(f::ComposedFunction{<:_PromoteSysSizeFunction}, op, T) =
f.f(mapreduce_empty(f.g, op, T))
end
mapreduce_empty(::typeof(abs), op, T) = abs(reduce_empty(op, T))
mapreduce_empty(::typeof(abs2), op, T) = abs2(reduce_empty(op, T))

mapreduce_empty(f::typeof(abs), ::Union{typeof(scalarmax), typeof(max)}, T) = abs(zero(T))
mapreduce_empty(f::typeof(abs2), ::Union{typeof(scalarmax), typeof(max)}, T) = abs2(zero(T))

mapreduce_empty_iter(f, op, itr, ::HasEltype) = mapreduce_empty(f, op, eltype(itr))
mapreduce_empty_iter(f, op::typeof(&), itr, ::EltypeUnknown) = true
mapreduce_empty_iter(f, op::typeof(|), itr, ::EltypeUnknown) = false
mapreduce_empty_iter(f, op, itr, ::EltypeUnknown) = _empty_reduce_error()

# handling of single-element iterators
"""
Base.reduce_first(op, x)

The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref`) or
[`foldr`](@ref) with reduction `op` over an iterator which contains a single element
`x`. This value may also used to initialise the recursion, so that `reduce(op, [x, y])`
may call `op(reduce_first(op, x), y)`.

The default is `x` for most types. The main purpose is to ensure type stability, so
additional methods should only be defined for cases where `op` gives a result with
different types than its inputs.
"""
reduce_first(op, x) = x
reduce_first(::typeof(+), x::Bool) = Int(x)
reduce_first(::typeof(*), x::Char) = string(x)

reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
reduce_first(::typeof(add_sum), x::SmallSigned) = Int(x)
reduce_first(::typeof(add_sum), x::SmallUnsigned) = UInt(x)
reduce_first(::typeof(mul_prod), x) = reduce_first(*, x)
reduce_first(::typeof(mul_prod), x::SmallSigned) = Int(x)
reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)

"""
Base.mapreduce_first(f, op, x)

The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
[`mapfoldr`](@ref) with map `f` and reduction `op` over an iterator which contains a
single element `x`. This value may also used to initialise the recursion, so that
`mapreduce(f, op, [x, y])` may call `op(reduce_first(op, f, x), f(y))`.

The default is `reduce_first(op, f(x))`.
"""
mapreduce_first(f, op, x) = reduce_first(op, f(x))

_mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A)

function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T
Expand All @@ -275,7 +337,7 @@ function _mapreduce(f, op, ::IndexLinear, A::AbstractArray{T}) where T
return mapreduce_empty(f, op, T)
elseif n == 1
@inbounds a1 = A[inds[1]]
return f(a1)
return mapreduce_first(f, op, a1)
elseif n < 16 # process short array here, avoid mapreduce_impl() compilation
@inbounds i = inds[1]
@inbounds a1 = A[i]
Expand All @@ -294,7 +356,7 @@ end
_mapreduce(f, op, ::IndexCartesian, A::AbstractArray) = mapfoldl(f, op, A)

mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, IndexStyle(A), A)
mapreduce(f, op, a::Number) = f(a)
mapreduce(f, op, a::Number) = mapreduce_first(f, op, a)

"""
reduce(op, v0, itr)
Expand Down Expand Up @@ -372,7 +434,7 @@ In the former case, the integers are widened to system word size and therefore
the result is 128. In the latter case, no such widening happens and integer
overflow results in -128.
"""
sum(f::Callable, a) = mapreduce(promote_sys_size_add ∘ f, +, a)
sum(f, a) = mapreduce(f, add_sum, a)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch: it looks like this Callable is not necessary for avoiding ambiguity any more. The same change should apply for prod.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


"""
sum(itr)
Expand All @@ -388,7 +450,7 @@ julia> sum(1:20)
210
```
"""
sum(a) = mapreduce(promote_sys_size_add, +, a)
sum(a) = sum(identity, a)
sum(a::AbstractArray{Bool}) = count(a)

## prod
Expand All @@ -406,7 +468,7 @@ julia> prod(abs2, [2; 3; 4])
576
```
"""
prod(f::Callable, a) = mapreduce(promote_sys_size_mul ∘ f, *, a)
prod(f, a) = mapreduce(f, mul_prod, a)

"""
prod(itr)
Expand All @@ -422,7 +484,7 @@ julia> prod(1:20)
2432902008176640000
```
"""
prod(a) = mapreduce(promote_sys_size_mul, *, a)
prod(a) = mapreduce(identity, mul_prod, a)

## maximum & minimum

Copy link
Contributor

@TotalVerb TotalVerb Dec 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth changing the f(a1) in the below function to mapreduce_single(f, op, a1) for consistency, although no reasonable types I can think of should have a lattice structure outside the type itself.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

Expand All @@ -433,7 +495,7 @@ function mapreduce_impl(f, op::Union{typeof(scalarmax),
A::AbstractArray, first::Int, last::Int)
# locate the first non NaN number
@inbounds a1 = A[first]
v = f(a1)
v = mapreduce_first(f, op, a1)
i = first + 1
while (v == v) && (i <= last)
@inbounds ai = A[i]
Expand Down
37 changes: 11 additions & 26 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ end
###### Generic reduction functions #####

## initialization

for (Op, initfun) in ((:(typeof(+)), :zero), (:(typeof(*)), :one))
# initarray! is only called by sum!, prod!, etc.
for (Op, initfun) in ((:(typeof(add_sum)), :zero), (:(typeof(mul_prod)), :one))
@eval initarray!(a::AbstractArray{T}, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a)
end

Expand All @@ -75,6 +75,7 @@ for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false))
@eval initarray!(a::AbstractArray, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
end

# reducedim_initarray is called by
reducedim_initarray(A::AbstractArray, region, v0, ::Type{R}) where {R} = fill!(similar(A,R,reduced_indices(A,region)), v0)
reducedim_initarray(A::AbstractArray, region, v0::T) where {T} = reducedim_initarray(A, region, v0, T)

Expand Down Expand Up @@ -104,10 +105,10 @@ reducedim_initarray0_empty(A::AbstractArray, region,::typeof(identity), ops) = m
promote_union(T::Union) = promote_type(promote_union(T.a), promote_union(T.b))
promote_union(T) = T

function reducedim_init(f, op::typeof(+), A::AbstractArray, region)
function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::AbstractArray, region)
_reducedim_init(f, op, zero, sum, A, region)
end
function reducedim_init(f, op::typeof(*), A::AbstractArray, region)
function reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::AbstractArray, region)
_reducedim_init(f, op, one, prod, A, region)
end
function _reducedim_init(f, op, fv, fop, A, region)
Expand Down Expand Up @@ -143,19 +144,12 @@ let
[AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}

global reducedim_init(f::typeof(identity), op::typeof(+), A::T, region) =
reducedim_initarray(A, region, zero(eltype(A)))
global reducedim_init(f::typeof(identity), op::typeof(*), A::T, region) =
reducedim_initarray(A, region, one(eltype(A)))
global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(+), A::T, region) =
reducedim_initarray(A, region, real(zero(eltype(A))))
global reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(*), A::T, region) =
reducedim_initarray(A, region, real(one(eltype(A))))
global reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region) =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's not safe to generalize this to f ∉ [identity, abs, abs2] because we need the reduction operator to be type stable over whatever is produced by the f used for mapreduce. Nevertheless, it's a nice simplification to have, but I think we will need to continue hardcoding the list of acceptable map functions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't mapreduce_single(f, op, zero(eltype(A))) should handle that?

Copy link
Contributor

@TotalVerb TotalVerb Dec 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, for some choices of f, this assumes that mr_single(f, +, 0) + f(0), etc., will be of the same concrete type of mr_single(f, +, 0), which one can design f to make it fail (for example, in the case of *, if f return a dimensionful quantity, like 1m, which has different concrete type than 1m^2). This seems to be where _reducedim_init is more general and guards against. Of course it is also possible to thwart _reducedim_init with badly behaving f, so 😕.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although I'm not actually sure _reducedim_init does what it should in these type unstable cases, so maybe it's best to leave cleanup of _reducedim_init to later.

reducedim_initarray(A, region, mapreduce_first(f, op, zero(eltype(A))))
global reducedim_init(f, op::Union{typeof(*),typeof(mul_prod)}, A::T, region) =
reducedim_initarray(A, region, mapreduce_first(f, op, one(eltype(A))))
end

reducedim_init(f::Union{typeof(identity),typeof(abs),typeof(abs2)}, op::typeof(+), A::AbstractArray{Bool}, region) =
reducedim_initarray(A, region, 0)

## generic (map)reduction

has_fast_linear_indexing(a::AbstractArray) = false
Expand Down Expand Up @@ -610,26 +604,17 @@ julia> any!([1 1], A)
"""
any!(r, A)

for (fname, op) in [(:sum, :+), (:prod, :*),
for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
(:maximum, :scalarmax), (:minimum, :scalarmin),
(:all, :&), (:any, :|)]
function compose_promote_sys_size(x)
if fname === :sum
:(promote_sys_size_add ∘ $x)
elseif fname === :prod
:(promote_sys_size_mul ∘ $x)
else
x
end
end
fname! = Symbol(fname, '!')
@eval begin
$(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
mapreducedim!(f, $(op), initarray!(r, $(op), init, A), A)
$(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)

$(fname)(f::Function, A::AbstractArray, region) =
mapreducedim($(compose_promote_sys_size(:f)), $(op), A, region)
mapreducedim(f, $(op), A, region)
$(fname)(A::AbstractArray, region) = $(fname)(identity, A, region)
end
end
Expand Down
5 changes: 5 additions & 0 deletions test/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,8 @@ test18695(r) = sum( t^2 for t in r )
# test neutral element not picked incorrectly for &, |
@test @inferred(foldl(&, Int[1])) === 1
@test_throws ArgumentError foldl(&, Int[])

# prod on Chars
@test prod(Char[]) == ""
@test prod(Char['a']) == "a"
@test prod(Char['a','b']) == "ab"
7 changes: 4 additions & 3 deletions test/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,10 @@ for region in Any[-1, 0, (-1, 2), [0, 1], (1,-2,3), [0 1;
end

# check type of result
under_test = [UInt8, Int8, Int32, Int64, BigInt]
@testset "type of sum(::Array{$T}" for T in under_test
@testset "type of sum(::Array{$T}" for T in [UInt8, Int8, Int32, Int64, BigInt]
result = sum(T[1 2 3; 4 5 6; 7 8 9], 2)
@test result == hcat([6, 15, 24])
@test eltype(result) === typeof(Base.promote_sys_size_add(zero(T)))
@test eltype(result) === (T <: Base.SmallSigned ? Int :
T <: Base.SmallUnsigned ? UInt :
T)
end