Skip to content

Commit

Permalink
Merge pull request #20921 from JuliaLang/sjk/quantile
Browse files Browse the repository at this point in the history
Allow quantile function to accept tuples
  • Loading branch information
simonster authored May 2, 2017
2 parents b6e4e70 + 8c00a68 commit 970a742
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 25 deletions.
52 changes: 27 additions & 25 deletions base/statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -623,10 +623,11 @@ median(v::AbstractArray, region) = mapslices(median!, v, region)
"""
quantile!([q, ] v, p; sorted=false)
Compute the quantile(s) of a vector `v` at the probabilities `p`, with optional output into
array `q` (if not provided, a new output array is created). The keyword argument `sorted`
indicates whether `v` can be assumed to be sorted; if `false` (the default), then the
elements of `v` may be partially sorted.
Compute the quantile(s) of a vector `v` at the probability or probabilities `p`, which
can be given as a single value, a vector, or a tuple. If `p` is a vector, an optional
output array `q` may also be specified. (If not provided, a new output array is created.)
The keyword argument `sorted` indicates whether `v` can be assumed to be sorted; if
`false` (the default), then the elements of `v` may be partially sorted.
The elements of `p` should be on the interval [0,1], and `v` should not have any `NaN`
values.
Expand All @@ -648,19 +649,10 @@ function quantile!(q::AbstractArray, v::AbstractVector, p::AbstractArray;
if size(p) != size(q)
throw(DimensionMismatch("size of p, $(size(p)), must equal size of q, $(size(q))"))
end
isempty(q) && return q

isempty(v) && throw(ArgumentError("empty data vector"))

lv = length(v)
if !sorted
minp, maxp = extrema(p)
lo = floor(Int,1+minp*(lv-1))
hi = ceil(Int,1+maxp*(lv-1))

# only need to perform partial sort
sort!(v, 1, lv, PartialQuickSort(lo:hi), Base.Sort.Forward)
end
isnan(v[end]) && throw(ArgumentError("quantiles are undefined in presence of NaNs"))
minp, maxp = extrema(p)
_quantilesort!(v, sorted, minp, maxp)

for (i, j) in zip(eachindex(p), eachindex(q))
@inbounds q[j] = _quantile(v,p[i])
Expand All @@ -671,21 +663,30 @@ end
quantile!(v::AbstractVector, p::AbstractArray; sorted::Bool=false) =
quantile!(similar(p,float(eltype(v))), v, p; sorted=sorted)

function quantile!(v::AbstractVector, p::Real;
sorted::Bool=false)
quantile!(v::AbstractVector, p::Real; sorted::Bool=false) =
_quantile(_quantilesort!(v, sorted, p, p), p)

function quantile!(v::AbstractVector, p::Tuple{Vararg{Real}}; sorted::Bool=false)
isempty(p) && return ()
minp, maxp = extrema(p)
_quantilesort!(v, sorted, minp, maxp)
return map(x->_quantile(v, x), p)
end

# Function to perform partial sort of v for quantiles in given range
function _quantilesort!(v::AbstractArray, sorted::Bool, minp::Real, maxp::Real)
isempty(v) && throw(ArgumentError("empty data vector"))

lv = length(v)
if !sorted
lo = floor(Int,1+p*(lv-1))
hi = ceil(Int,1+p*(lv-1))
lv = length(v)
lo = floor(Int,1+minp*(lv-1))
hi = ceil(Int,1+maxp*(lv-1))

# only need to perform partial sort
sort!(v, 1, lv, PartialQuickSort(lo:hi), Base.Sort.Forward)
end
isnan(v[end]) && throw(ArgumentError("quantiles are undefined in presence of NaNs"))

return _quantile(v,p)
return v
end

# Core quantile lookup function: assumes `v` sorted
Expand Down Expand Up @@ -717,8 +718,9 @@ end
"""
quantile(v, p; sorted=false)
Compute the quantile(s) of a vector `v` at a specified probability or vector `p`. The
keyword argument `sorted` indicates whether `v` can be assumed to be sorted.
Compute the quantile(s) of a vector `v` at a specified probability or vector or tuple of
probabilities `p`. The keyword argument `sorted` indicates whether `v` can be assumed to
be sorted.
The `p` should be on the interval [0,1], and `v` should not have any `NaN` values.
Expand Down
4 changes: 4 additions & 0 deletions test/statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ end
@test quantile([Inf,Inf],0.5) == Inf
@test quantile([-Inf,1],0.5) == -Inf
@test quantile([0,1],1e-18) == 1e-18
@test quantile([1, 2, 3, 4],[]) == []
@test quantile([1, 2, 3, 4], (0.5,)) == (2.5,)
@test quantile([4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11], (0.1, 0.2, 0.4, 0.9)) == (2.0, 3.0, 5.0, 11.0)
@test quantile([1, 2, 3, 4], ()) == ()

# StatsBase issue 164
y = [0.40003674665581906,0.4085630862624367,0.41662034698690303,0.41662034698690303,0.42189053966652057,0.42189053966652057,0.42553514344518345,0.43985732442991354]
Expand Down

2 comments on commit 970a742

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @jrevels

Please sign in to comment.