Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bootstrap estimation for cluster sampling #151

Merged
merged 2 commits into from
Dec 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

Expand Down
2 changes: 2 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Survey = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Survey
using Documenter
using Random

DocMeta.setdocmeta!(Survey, :DocTestSetup, :(using Survey); recursive=true)

Expand Down
3 changes: 3 additions & 0 deletions src/Survey.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ using LinearAlgebra
using CairoMakie
using AlgebraOfGraphics
using CategoricalArrays
using Random

include("SurveyDesign.jl")
include("mean.jl")
Expand All @@ -22,6 +23,7 @@ include("plot.jl")
include("dimnames.jl")
include("boxplot.jl")
include("show.jl")
include("bootstrap.jl")

export load_data
export AbstractSurveyDesign, SimpleRandomSample, StratifiedSample
Expand All @@ -31,6 +33,7 @@ export mean, total, quantile
export plot
export hist, sturges, freedman_diaconis
export boxplot
export bootstrap
export jkknife

end
36 changes: 36 additions & 0 deletions src/bootstrap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
```jldoctest
julia> using Survey, Random, StatsBase;

julia> apiclus1 = load_data("apiclus1");

julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc);

julia> rng = MersenneTwister(111);

julia> func = wsum;

julia> bootstrap(:api00, dclus1, func; replicates=1000, rng)
1×2 DataFrame
Row │ statistic SE
│ Float64 Float64
─────┼──────────────────────
1 │ 5.94916e6 1.36593e6

```
"""
function bootstrap(x::Symbol, design::OneStageClusterSample, func = wsum; replicates = 100, rng = MersenneTwister(1234))
gdf = groupby(design.data, design.cluster)
psus = unique(design.data[!, design.cluster])
nh = length(psus)
X = func(design.data[:, x], design.data.weights)
Xt = Array{Float64, 1}(undef, replicates)
for i in 1:replicates
selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
Xt[i] = func(xhij, whij)
end
variance = sum((Xt .- X).^2) / replicates
return DataFrame(statistic = X, SE = sqrt(variance))
end
2 changes: 2 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[deps]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
10 changes: 10 additions & 0 deletions test/bootstrap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using Random, StatsBase
apiclus1 = load_data("apiclus1")
dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc);
rng = MersenneTwister(111);
func = wsum;
est = bootstrap(:api00, dclus1, func; replicates=1000, rng)
@testset "bootstrap.jl" begin
@test est.SE[1] ≈ 1.365925776009e6
@test est.statistic[1] ≈ 5.9491620666e6
end
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ include("mean.jl")
include("dimnames.jl")
include("plot.jl")
include("hist.jl")
include("boxplot.jl")
include("boxplot.jl")
include("bootstrap.jl")