diff --git a/src/Survey.jl b/src/Survey.jl index 095ae659..38a4a7c0 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -14,6 +14,7 @@ using CategoricalArrays include("SurveyDesign.jl") include("mean.jl") include("quantile.jl") +include("jackknife.jl") include("total.jl") include("load_data.jl") include("hist.jl") @@ -30,5 +31,6 @@ export mean, total, quantile export plot export hist, sturges, freedman_diaconis export boxplot +export jkknife end \ No newline at end of file diff --git a/src/jackknife.jl b/src/jackknife.jl new file mode 100644 index 00000000..2d0a656d --- /dev/null +++ b/src/jackknife.jl @@ -0,0 +1,16 @@ +function jkknife(variable:: Symbol, design::OneStageClusterSample ,func:: Function; params =[]) + statistic = func(design.data[!,variable],params...) + nh = length(unique(design.data[!,design.cluster])) + newv = [] + gdf = groupby(design.data, design.cluster) + replicates = [filter(n -> n != i, 1:nh) for i in 1:nh] + for i in replicates + push!(newv,func(DataFrame(gdf[i])[!,variable])) + end + c = 0 + for i in 1:nh + c = c+(newv[i]-statistic)^2 + end + var = c*(nh-1)/nh + return DataFrame(Statistic = statistic, SE = sqrt(var)) +end \ No newline at end of file diff --git a/test/jackknife.jl b/test/jackknife.jl new file mode 100644 index 00000000..85ca3261 --- /dev/null +++ b/test/jackknife.jl @@ -0,0 +1,10 @@ +@testset "jackknife.jl" begin + apiclus1_original = load_data("apiclus1") + apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column + ############################## + # one-stage cluster sample + apiclus1 = copy(apiclus1_original) + dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc) + @test jkknife(:api00,dclus1, mean).SE[1] ≈ 26.5997 atol = 1e-4 + @test jkknife(:api00, dclus1, mean).Statistic[1] ≈ 644.1693 atol = 1e-4 +end