Skip to content

Commit

Permalink
Merge pull request #127 from smishr/quantile
Browse files Browse the repository at this point in the history
Update quantile
  • Loading branch information
smishr authored Dec 8, 2022
2 parents 53fbecb + f8ab7dc commit d472207
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 35 deletions.
1 change: 1 addition & 0 deletions src/Survey.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module Survey

using DataFrames
using Statistics
import Statistics: quantile
using StatsBase
import StatsBase: mean,quantile
using CSV
Expand Down
4 changes: 2 additions & 2 deletions src/mean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ end
Estimate the subpopulation mean of a variable `x`.
The calculations were done according to the book [Calibration Estimators in Survey Sampling](https://www.tandfonline.com/doi/abs/10.1080/01621459.1992.10475217)
by Jean-Claude Deville and Carl-Erik Sarndal.
The calculations were done according to the book [Model-Assisted Survey Sampling](https://link.springer.com/book/9780387406206)
by Carl-Erik Sarndal, Bengt Swensson, Jan Wretman, section 3.3 and Chap 10. Assumes popsize is known and subpopulation size is unknown.
```jldoctest
julia> using Survey;
Expand Down
66 changes: 40 additions & 26 deletions src/quantile.jl
Original file line number Diff line number Diff line change
@@ -1,39 +1,53 @@
"""
quantile(var, design, q)
Estimate quantiles for `SurveyDesign`s.
quantile(var, design, p; kwargs...)
Estimate quantiles for a complex survey.
Hyndman and Fan compiled a taxonomy of nine algorithms to estimate quantiles. These are implemented in Statistics.quantile, which this function calls.
The Julia, R and Python-numpy use the same defaults
# References:
- Hyndman, R.J and Fan, Y. (1996) ["Sample Quantiles in Statistical Packages"](https://www.amherst.edu/media/view/129116/original/Sample+Quantiles.pdf), The American Statistician, Vol. 50, No. 4, pp. 361-365.
- [Quantiles](https://en.m.wikipedia.org/wiki/Quantile) on wikipedia
- [Complex Surveys: a guide to analysis using R](https://r-survey.r-forge.r-project.org/svybook/), Section 2.4.1 and Appendix C.4.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
julia> quantile(:enroll, srs, 0.5)
1×1 DataFrame
Row │ 0.5th percentile
│ Float64
─────┼──────────────────
1 │ 453.0
julia> quantile(:api00,srs,0.5)
1×2 DataFrame
Row │ probability quantile
│ Float64 Float64
─────┼───────────────────────
1 │ 0.5 659.0
julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
5×2 DataFrame
Row │ probability quantile
│ Float64 Float64
─────┼───────────────────────
1 │ 0.1 245.5
2 │ 0.2 317.6
3 │ 0.5 453.0
4 │ 0.75 668.5
5 │ 0.95 1473.1
```
"""
function quantile(var, design::SimpleRandomSample, q; kwargs...)
x = design.data[!, var]
df = DataFrame(tmp = Statistics.quantile(Float32.(x), q; kwargs...))
rename!(df, :tmp => Symbol(string(q) .* "th percentile"))
return df
end

function quantile(var, design::StratifiedSample, q)
x = design.data[!, var]
w = design.data.probs
df = DataFrame(tmp = Statistics.quantile(Float32.(x), weights(w), q))
rename!(df, :tmp => Symbol(string(q) .* "th percentile"))
function quantile(var::Symbol, design::SimpleRandomSample, p::Union{<:Real,Vector{<:Real}};
alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...)
v = design.data[!, var]
probs = design.data[!, :probs]
df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p))
# TODO: Add CI and SE of the quantile
return df
end

# Inner method for `by`
function quantile(x, w, _, q)
df = DataFrame(tmp = Statistics.quantile(Float32.(x), weights(w), q))
rename!(df, :tmp => Symbol(string(q) .* "th percentile"))

function quantile(var::Symbol, design::StratifiedSample, p::Union{<:Real,Vector{<:Real}};
alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...)
v = design.data[!, var]
probs = design.data[!, :probs]
df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p)) # Not sure which quantile defintion this returns
# TODO: Add CI and SE of the quantile
return df
end
end
37 changes: 30 additions & 7 deletions test/quantile.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,34 @@
@testset "quantile.jl" begin
# SimpleRandomSample
@testset "quantile_SimpleRandomSample" begin
##### SimpleRandomSample tests
# Load API datasets
apisrs_original = load_data("apisrs")
apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw
apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1))
##############################
apisrs = copy(apisrs_original)
srs_design = SimpleRandomSample(apisrs,popsize=:fpc)
@test quantile(:api00,srs_design,0.5)[!,2][1] 659.0 atol=1e-4
@test quantile(:api00,srs_design,[0.1753,0.25,0.5,0.75,0.975])[!,2] [512.8847,544,659,752.5,905] atol = 1e-4
@test quantile(:enroll,srs_design,[0.1,0.2,0.5,0.75,0.95])[!,2] [245.5,317.6,453.0,668.5,1473.1] atol = 1e-4
end

apisrs = copy(apisrs_original)
srs_new = SimpleRandomSample(apisrs; popsize=:fpc, ignorefpc=true)
# 0.5th percentile
# 0.25th percentile
@testset "quantile_Stratified" begin
##### StratifiedSample tests
# Load API datasets
apistrat_original = load_data("apistrat")
apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw
apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw
# base functionality
apistrat = copy(apistrat_original)
dstrat = StratifiedSample(apistrat, :stype; popsize = :fpc)
# Check which definition of quantile for StratifiedSample
# @test quantile(:enroll,dstrat,[0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [262,309.3366,446.4103,658.8764,1589.7881] atol = 1e-4
end

# StratifiedSample
@testset "quantile_by_SimpleRandomSample" begin
## Add tests
end

@testset "quantile_by_Stratified" begin
## Add tests
end

0 comments on commit d472207

Please sign in to comment.