From 8bb8bfd9db924394471570b99d331b6e77a291a5 Mon Sep 17 00:00:00 2001 From: ayushpatnaikgit Date: Thu, 19 Jan 2023 14:55:59 +0530 Subject: [PATCH] Remove jackknife and using bootstrap in ratio estimation. --- src/Survey.jl | 1 - src/jackknife.jl | 16 ---------------- src/ratio.jl | 33 +++++++++++---------------------- test/jackknife.jl | 8 -------- test/ratio.jl | 6 +++--- 5 files changed, 14 insertions(+), 50 deletions(-) delete mode 100644 src/jackknife.jl delete mode 100644 test/jackknife.jl diff --git a/src/Survey.jl b/src/Survey.jl index f25e33a7..66de8042 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -17,7 +17,6 @@ include("SurveyDesign.jl") include("bootstrap.jl") include("mean.jl") include("quantile.jl") -include("jackknife.jl") include("total.jl") include("load_data.jl") include("hist.jl") diff --git a/src/jackknife.jl b/src/jackknife.jl deleted file mode 100644 index 55880df9..00000000 --- a/src/jackknife.jl +++ /dev/null @@ -1,16 +0,0 @@ -function jkknife(variable:: Symbol, design::SurveyDesign ,func:: Function; params =[]) - statistic = func(design.data[!,variable],params...) - nh = length(unique(design.data[!,design.cluster])) - newv = [] - gdf = groupby(design.data, design.cluster) - replicates = [filter(n -> n != i, 1:nh) for i in 1:nh] - for i in replicates - push!(newv,func(DataFrame(gdf[i])[!,variable])) - end - c = 0 - for i in 1:nh - c = c+(newv[i]-statistic)^2 - end - var = c*(nh-1)/nh - return DataFrame(Statistic = statistic, SE = sqrt(var)) -end diff --git a/src/ratio.jl b/src/ratio.jl index ebfef889..8a0226f3 100644 --- a/src/ratio.jl +++ b/src/ratio.jl @@ -6,30 +6,19 @@ Estimate the ratio of the columns specified in numerator and denominator ```jldoctest julia> apiclus1 = load_data("apiclus1"); -julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); +julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights; -julia> ratio(:api00, :enroll, clus_one_stage) +ratio(:api00, :enroll, clus_one_stage) 1×2 DataFrame - Row │ Statistic SE - │ Float64 Float64 -─────┼───────────────────── - 1 │ 1.17182 0.151242 + Row │ ratio SE + │ Float64 Float64 +─────┼─────────────────── + 1 │ 1.17182 0.130834 ``` """ -function ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesign) - statistic = wsum(design.data[!,variable_num],design.data[!,design.weights])/wsum(design.data[!,variable_den],design.data[!,design.weights]) - nh = length(unique(design.data[!,design.cluster])) - newv = [] - gdf = groupby(design.data, design.cluster) - replicates = [filter(n -> n != i, 1:nh) for i in 1:nh] - for i in replicates - df = DataFrame(gdf[i]) - push!(newv, wsum(df[!,variable_num],df[!,design.weights])/wsum(df[!,variable_den],df[!,design.weights])) - end - c = 0 - for i in 1:nh - c = c+(newv[i]-statistic)^2 - end - var = c*(nh-1)/nh - return DataFrame(Statistic = statistic, SE = sqrt(var)) +function ratio(variable_num::Symbol, variable_den::Symbol, design::ReplicateDesign) + X = wsum(design.data[!, variable_num], design.data[!, design.weights]) / wsum(design.data[!, variable_den], design.data[!, design.weights]) + Xt = [(wsum(design.data[!, variable_num], weights(design.data[! , "replicate_"*string(i)]))) / (wsum(design.data[!, variable_den], weights(design.data[! , "replicate_"*string(i)]))) for i in 1:design.replicates] + variance = sum((Xt .- X).^2) / design.replicates + DataFrame(ratio = X, SE = sqrt(variance)) end diff --git a/test/jackknife.jl b/test/jackknife.jl deleted file mode 100644 index 25e35e91..00000000 --- a/test/jackknife.jl +++ /dev/null @@ -1,8 +0,0 @@ -@testset "jackknife.jl" begin - apiclus1_original = load_data("apiclus1") - apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column - apiclus1 = copy(apiclus1_original) - dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights=:pw); - @test jkknife(:api00, dclus1, mean).SE[1] ≈ 26.5997 atol = 1e-4 - @test jkknife(:api00, dclus1, mean).Statistic[1] ≈ 644.1693 atol = 1e-4 -end diff --git a/test/ratio.jl b/test/ratio.jl index b8652ef1..9185952c 100644 --- a/test/ratio.jl +++ b/test/ratio.jl @@ -4,7 +4,7 @@ ############################## # one-stage cluster sample apiclus1 = copy(apiclus1_original) - dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize = :fpc) - @test ratio(:api00, :enroll, dclus1).SE[1] ≈ 0.151242 atol = 1e-4 - @test ratio(:api00, :enroll, dclus1).Statistic[1] ≈ 1.17182 atol = 1e-4 + dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize = :fpc) |> bootweights + @test ratio(:api00, :enroll, dclus1).SE[1] ≈ 0.1275446 atol = 1e-1 + @test ratio(:api00, :enroll, dclus1).ratio[1] ≈ 1.17182 atol = 1e-4 end \ No newline at end of file