From 7467d9a59f7fed2a3efc30e86fc9ff53268350a4 Mon Sep 17 00:00:00 2001 From: smishr <43640926+smishr@users.noreply.github.com> Date: Tue, 6 Dec 2022 16:45:50 +0530 Subject: [PATCH 1/7] Edit mean references --- src/mean.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mean.jl b/src/mean.jl index f24ace7b..95869857 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -95,8 +95,8 @@ end Estimate the subpopulation mean of a variable `x`. -The calculations were done according to the book [Calibration Estimators in Survey Sampling](https://www.tandfonline.com/doi/abs/10.1080/01621459.1992.10475217) -by Jean-Claude Deville and Carl-Erik Sarndal. +The calculations were done according to the book [Model-Assisted Survey Sampling](https://link.springer.com/book/9780387406206) +by Carl-Erik Sarndal, Bengt Swensson, Jan Wretman, section 3.3 and Chap 10. Assumes popsize is known and subpopulation size is unknown. ```jldoctest julia> using Survey; From 5d665f982e419c560d313689b78fc05a4c5bab1f Mon Sep 17 00:00:00 2001 From: smishr <43640926+smishr@users.noreply.github.com> Date: Tue, 6 Dec 2022 16:46:11 +0530 Subject: [PATCH 2/7] updated quantile --- src/quantile.jl | 57 +++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/src/quantile.jl b/src/quantile.jl index 354e95b4..d81e2a6e 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -1,6 +1,14 @@ """ - quantile(var, design, q) -Estimate quantiles for `SurveyDesign`s. + quantile(var, design, q; kwargs...) +Estimate quantiles for a complex survey. + +Hyndman and Fan compiled a taxonomy of nine algorithms to estimate quantiles. These are implemented in Statistics.quantile, which this function calls. +The Julia, R and Python-numpy use the same defaults + +# References: +Hyndman, R.J and Fan, Y. (1996) "Sample Quantiles in Statistical Packages", The American Statistician, Vol. 50, No. 4, pp. 361-365 +[Quantiles](https://en.m.wikipedia.org/wiki/Quantile) on wikipedia +Section 2.4.1 and Appendix C.4 - [Complex Surveys: a guide to analysis using R](https://r-survey.r-forge.r-project.org/svybook/) ```jldoctest julia> apisrs = load_data("apisrs"); @@ -13,36 +21,33 @@ julia> quantile(:enroll, srs, 0.5) │ Float64 ─────┼────────────────── 1 │ 453.0 + +julia> quantile(:enroll, srs, [0.25,0.75, 0.99]) +3×1 DataFrame + Row │ [0.25, 0.75, 0.99]th percentile + │ Float64 +─────┼───────────────────────────────── + 1 │ 339.0 + 2 │ 668.5 + 3 │ 1911.39 + +julia> strat = load_data("apistrat"); + +julia> dstrat = StratifiedSample(strat, :stype; popsize=:fpc); + +julia> quantile(:enroll, dstrat, [0.1,0.2,0.5,0.75,0.95]) + ``` """ -function quantile(var, design::SimpleRandomSample, q; kwargs...) +function quantile(var::Symbol, design::SimpleRandomSample, q::Union{<:Real,Vector{<:Real}}; alpha::Real=1.0, beta::Real=alpha, kwargs...) x = design.data[!, var] - df = DataFrame(tmp = Statistics.quantile(Float32.(x), q; kwargs...)) - rename!(df, :tmp => Symbol(string(q) .* "th percentile")) + df = DataFrame(qth_quantile = q, quantile = Statistics.quantile(Float32.(x), q; kwargs...)) return df end -function quantile(var, design::StratifiedSample, q) +function quantile(var::Symbol, design::StratifiedSample, q::Union{<:Real,Vector{<:Real}}; kwargs...) x = design.data[!, var] w = design.data.probs - df = DataFrame(tmp = Statistics.quantile(Float32.(x), weights(w), q)) - rename!(df, :tmp => Symbol(string(q) .* "th percentile")) - return df -end - -function quantile(var, design::design, q) - x = design.variables[!, var] - w = design.variables.probs - df = DataFrame(tmp = Statistics.quantile(Float32.(x), weights(w), q)) - rename!(df, :tmp => Symbol(string(q) .* "th percentile")) - - return df -end - -# Inner method for `by` -function quantile(x, w, _, q) - df = DataFrame(tmp = Statistics.quantile(Float32.(x), weights(w), q)) - rename!(df, :tmp => Symbol(string(q) .* "th percentile")) - + df = DataFrame(qth_quantile = q, quantile = Statistics.quantile(Float32.(x), weights(w), q)) return df -end +end \ No newline at end of file From 73cfd66fd26c74715f8fb2fbc96df825a68bbf38 Mon Sep 17 00:00:00 2001 From: smishr <43640926+smishr@users.noreply.github.com> Date: Tue, 6 Dec 2022 16:46:26 +0530 Subject: [PATCH 3/7] change quantile tests --- src/Survey.jl | 1 + test/quantile.jl | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Survey.jl b/src/Survey.jl index 5150ca45..9cec5118 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -2,6 +2,7 @@ module Survey using DataFrames using Statistics +import Statistics: quantile using StatsBase import StatsBase: mean,quantile using CSV diff --git a/test/quantile.jl b/test/quantile.jl index 51c79007..a233ebef 100644 --- a/test/quantile.jl +++ b/test/quantile.jl @@ -1,17 +1,25 @@ -@testset "quantile.jl" begin - # SimpleRandomSample - apisrs = load_data("apisrs") +@testset "quantile_SimpleRandomSample" begin + ##### SimpleRandomSample tests + # Load API datasets + apisrs_original = load_data("apisrs") + apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw + apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1)) + ############################## + ### weights or probs as Symbol + apisrs = copy(apisrs_original) + srs_design = SimpleRandomSample(apisrs,popsize=:fpc) + - srs_new = SimpleRandomSample(apisrs,popsize=:fpc,ignorefpc = true) - srs_old = design(id = :1, data = apisrs) + + # srs_old = design(id = :1, data = apisrs) # 0.5th percentile q_05_new = quantile(:api00, srs_new, 0.5) - q_05_old = quantile(:api00, srs_old, 0.5) + # q_05_old = quantile(:api00, srs_old, 0.5) @test q_05_new == q_05_old # 0.25th percentile q_025_new = quantile(:api00, srs_new, 0.25) - q_025_old = quantile(:api00, srs_old, 0.25) - @test q_025_new == q_025_old + # q_025_old = quantile(:api00, srs_old, 0.25) + # @test q_025_new == q_025_old # StratifiedSample end From 346e44426d39c563ea1688ce8ffe226b0b5fe4f6 Mon Sep 17 00:00:00 2001 From: smishr <43640926+smishr@users.noreply.github.com> Date: Tue, 6 Dec 2022 16:51:02 +0530 Subject: [PATCH 4/7] quantile tests updated template --- test/quantile.jl | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/test/quantile.jl b/test/quantile.jl index a233ebef..5a9a711d 100644 --- a/test/quantile.jl +++ b/test/quantile.jl @@ -8,18 +8,16 @@ ### weights or probs as Symbol apisrs = copy(apisrs_original) srs_design = SimpleRandomSample(apisrs,popsize=:fpc) - - +end - # srs_old = design(id = :1, data = apisrs) - # 0.5th percentile - q_05_new = quantile(:api00, srs_new, 0.5) - # q_05_old = quantile(:api00, srs_old, 0.5) - @test q_05_new == q_05_old - # 0.25th percentile - q_025_new = quantile(:api00, srs_new, 0.25) - # q_025_old = quantile(:api00, srs_old, 0.25) - # @test q_025_new == q_025_old +@testset "quantile_Stratified" begin + ## Add tests +end - # StratifiedSample +@testset "quantile_by_SimpleRandomSample" begin + ## Add tests end + +@testset "quantile_by_Stratified" begin + ## Add tests +end \ No newline at end of file From 0d72347400caab7d23b6f0f0b95de2a6bc9b8451 Mon Sep 17 00:00:00 2001 From: smishr <43640926+smishr@users.noreply.github.com> Date: Wed, 7 Dec 2022 16:15:08 +0530 Subject: [PATCH 5/7] Fix quantile for SRS --- src/quantile.jl | 63 +++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/src/quantile.jl b/src/quantile.jl index d81e2a6e..3e924109 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -1,53 +1,50 @@ """ - quantile(var, design, q; kwargs...) + quantile(var, design, p; kwargs...) Estimate quantiles for a complex survey. Hyndman and Fan compiled a taxonomy of nine algorithms to estimate quantiles. These are implemented in Statistics.quantile, which this function calls. The Julia, R and Python-numpy use the same defaults # References: -Hyndman, R.J and Fan, Y. (1996) "Sample Quantiles in Statistical Packages", The American Statistician, Vol. 50, No. 4, pp. 361-365 -[Quantiles](https://en.m.wikipedia.org/wiki/Quantile) on wikipedia -Section 2.4.1 and Appendix C.4 - [Complex Surveys: a guide to analysis using R](https://r-survey.r-forge.r-project.org/svybook/) +- Hyndman, R.J and Fan, Y. (1996) ["Sample Quantiles in Statistical Packages"](https://www.amherst.edu/media/view/129116/original/Sample+Quantiles.pdf), The American Statistician, Vol. 50, No. 4, pp. 361-365. +- [Quantiles](https://en.m.wikipedia.org/wiki/Quantile) on wikipedia +- [Complex Surveys: a guide to analysis using R](https://r-survey.r-forge.r-project.org/svybook/), Section 2.4.1 and Appendix C.4. ```jldoctest julia> apisrs = load_data("apisrs"); julia> srs = SimpleRandomSample(apisrs;popsize=:fpc); -julia> quantile(:enroll, srs, 0.5) -1×1 DataFrame - Row │ 0.5th percentile - │ Float64 -─────┼────────────────── - 1 │ 453.0 - -julia> quantile(:enroll, srs, [0.25,0.75, 0.99]) -3×1 DataFrame - Row │ [0.25, 0.75, 0.99]th percentile - │ Float64 -─────┼───────────────────────────────── - 1 │ 339.0 - 2 │ 668.5 - 3 │ 1911.39 - -julia> strat = load_data("apistrat"); - -julia> dstrat = StratifiedSample(strat, :stype; popsize=:fpc); - -julia> quantile(:enroll, dstrat, [0.1,0.2,0.5,0.75,0.95]) - +julia> quantile(:api00,srs,0.5) +1×2 DataFrame + Row │ probability quantile + │ Float64 Float64 +─────┼─────────────────────── + 1 │ 0.5 659.0 + +julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95]) +5×2 DataFrame + Row │ probability quantile + │ Float64 Float64 +─────┼─────────────────────── + 1 │ 0.1 245.5 + 2 │ 0.2 317.6 + 3 │ 0.5 453.0 + 4 │ 0.75 668.5 + 5 │ 0.95 1473.1 ``` """ -function quantile(var::Symbol, design::SimpleRandomSample, q::Union{<:Real,Vector{<:Real}}; alpha::Real=1.0, beta::Real=alpha, kwargs...) - x = design.data[!, var] - df = DataFrame(qth_quantile = q, quantile = Statistics.quantile(Float32.(x), q; kwargs...)) +function quantile(var::Symbol, design::SimpleRandomSample, p::Union{<:Real,Vector{<:Real}}; ci::Bool=false, se::Bool=false, kwargs...) + v = design.data[!, var] + probs = design.data[!, :probs] + df = DataFrame(probability = p, quantile = Statistics.quantile(v, ProbabilityWeights(probs),p)) + # TODO: Add CI and SE of the quantile return df end -function quantile(var::Symbol, design::StratifiedSample, q::Union{<:Real,Vector{<:Real}}; kwargs...) - x = design.data[!, var] - w = design.data.probs - df = DataFrame(qth_quantile = q, quantile = Statistics.quantile(Float32.(x), weights(w), q)) +function quantile(var::Symbol, design::StratifiedSample, p::Union{<:Real,Vector{<:Real}}; kwargs...) + v = design.data[!, var] + probs = design.data[!, :probs] + df = DataFrame(qth_quantile = p, quantile = Statistics.quantile(v, ProbabilityWeights(probs), p)) return df end \ No newline at end of file From cb95f950cc482fa0455ddf0c38b7ef281bf02c0f Mon Sep 17 00:00:00 2001 From: smishr <43640926+smishr@users.noreply.github.com> Date: Wed, 7 Dec 2022 16:21:29 +0530 Subject: [PATCH 6/7] Add test quantile SRS --- src/quantile.jl | 4 ++-- test/quantile.jl | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/quantile.jl b/src/quantile.jl index 3e924109..7953af98 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -42,9 +42,9 @@ function quantile(var::Symbol, design::SimpleRandomSample, p::Union{<:Real,Vecto return df end -function quantile(var::Symbol, design::StratifiedSample, p::Union{<:Real,Vector{<:Real}}; kwargs...) +function quantile(var::Symbol, design::StratifiedSample, p::Union{<:Real,Vector{<:Real}}; ci::Bool=false, se::Bool=false, kwargs...) v = design.data[!, var] probs = design.data[!, :probs] - df = DataFrame(qth_quantile = p, quantile = Statistics.quantile(v, ProbabilityWeights(probs), p)) + df = DataFrame(probability = p, quantile = Statistics.quantile(v, ProbabilityWeights(probs), p)) return df end \ No newline at end of file diff --git a/test/quantile.jl b/test/quantile.jl index 5a9a711d..e4517251 100644 --- a/test/quantile.jl +++ b/test/quantile.jl @@ -8,6 +8,8 @@ ### weights or probs as Symbol apisrs = copy(apisrs_original) srs_design = SimpleRandomSample(apisrs,popsize=:fpc) + @test quantile(:api00,srs_design,0.5)[!,2] ≈ 659 atol = 1e-4 + @test quantile(:enroll,srs_design,[0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [245.5,317.6,453.0,668.5,1473.1] atol = 1e-4 end @testset "quantile_Stratified" begin From f8ab7dc0efd81ce7b0f8df5042a1d84d7f4dbbd7 Mon Sep 17 00:00:00 2001 From: smishr <43640926+smishr@users.noreply.github.com> Date: Thu, 8 Dec 2022 17:39:09 +0530 Subject: [PATCH 7/7] FIx SRS quantile tests --- src/quantile.jl | 11 +++++++---- test/quantile.jl | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/quantile.jl b/src/quantile.jl index 7953af98..8ee6000c 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -34,17 +34,20 @@ julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95]) 5 │ 0.95 1473.1 ``` """ -function quantile(var::Symbol, design::SimpleRandomSample, p::Union{<:Real,Vector{<:Real}}; ci::Bool=false, se::Bool=false, kwargs...) +function quantile(var::Symbol, design::SimpleRandomSample, p::Union{<:Real,Vector{<:Real}}; + alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...) v = design.data[!, var] probs = design.data[!, :probs] - df = DataFrame(probability = p, quantile = Statistics.quantile(v, ProbabilityWeights(probs),p)) + df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p)) # TODO: Add CI and SE of the quantile return df end -function quantile(var::Symbol, design::StratifiedSample, p::Union{<:Real,Vector{<:Real}}; ci::Bool=false, se::Bool=false, kwargs...) +function quantile(var::Symbol, design::StratifiedSample, p::Union{<:Real,Vector{<:Real}}; + alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...) v = design.data[!, var] probs = design.data[!, :probs] - df = DataFrame(probability = p, quantile = Statistics.quantile(v, ProbabilityWeights(probs), p)) + df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p)) # Not sure which quantile defintion this returns + # TODO: Add CI and SE of the quantile return df end \ No newline at end of file diff --git a/test/quantile.jl b/test/quantile.jl index e4517251..cab18fdb 100644 --- a/test/quantile.jl +++ b/test/quantile.jl @@ -5,15 +5,24 @@ apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1)) ############################## - ### weights or probs as Symbol apisrs = copy(apisrs_original) srs_design = SimpleRandomSample(apisrs,popsize=:fpc) - @test quantile(:api00,srs_design,0.5)[!,2] ≈ 659 atol = 1e-4 + @test quantile(:api00,srs_design,0.5)[!,2][1] ≈ 659.0 atol=1e-4 + @test quantile(:api00,srs_design,[0.1753,0.25,0.5,0.75,0.975])[!,2] ≈ [512.8847,544,659,752.5,905] atol = 1e-4 @test quantile(:enroll,srs_design,[0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [245.5,317.6,453.0,668.5,1473.1] atol = 1e-4 end @testset "quantile_Stratified" begin - ## Add tests + ##### StratifiedSample tests + # Load API datasets + apistrat_original = load_data("apistrat") + apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw + apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw + # base functionality + apistrat = copy(apistrat_original) + dstrat = StratifiedSample(apistrat, :stype; popsize = :fpc) + # Check which definition of quantile for StratifiedSample + # @test quantile(:enroll,dstrat,[0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [262,309.3366,446.4103,658.8764,1589.7881] atol = 1e-4 end @testset "quantile_by_SimpleRandomSample" begin