From bfe0cdcec2ac2f274d8fee9fd65d39b4df08b2f6 Mon Sep 17 00:00:00 2001 From: smishr Date: Fri, 2 Sep 2022 18:28:42 +0530 Subject: [PATCH] Add svyby basic functionality for svymean --- src/Survey.jl | 2 ++ src/svyby.jl | 38 ++++++++++++++++++++++++++++++++++++++ src/svymean.jl | 12 ++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 src/svyby.jl diff --git a/src/Survey.jl b/src/Survey.jl index b90265eb..5f0b2a7d 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -20,11 +20,13 @@ include("svyhist.jl") include("svyplot.jl") include("dimnames.jl") include("svyboxplot.jl") +include("svyby.jl") export load_data export AbstractSurveyDesign, SimpleRandomSample, StratifiedSample export svydesign export svyglm +export svyby export dim, colnames, dimnames export svymean, svytotal, svyquantile export @formula diff --git a/src/svyby.jl b/src/svyby.jl new file mode 100644 index 00000000..4d208f60 --- /dev/null +++ b/src/svyby.jl @@ -0,0 +1,38 @@ +""" +The `svyby` function can be used to generate subsets of a survey design. + +```jldoctest +julia> using Survey + +julia> apisrs = load_data("apisrs"); + +julia> srs = SimpleRandomSample(apisrs); + +julia> svyby(:api00, :cname, srs, svytotal) +38×2 DataFrame + Row │ cname total + │ String15 Float64 +─────┼────────────────────────── + 1 │ Kern 5736.0 + 2 │ Los Angeles 29617.0 + 3 │ Orange 6744.0 + 4 │ San Luis Obispo 739.0 + 5 │ San Francisco 1675.0 + 6 │ Modoc 671.0 + 7 │ Alameda 7437.0 + 8 │ Solano 1869.0 + ⋮ │ ⋮ ⋮ + 32 │ Kings 939.0 + 33 │ Shasta 1508.0 + 34 │ Yolo 475.0 + 35 │ Calaveras 790.0 + 36 │ Napa 1454.0 + 37 │ Lake 804.0 + 38 │ Merced 595.0 + 23 rows omitted +``` +""" +function svyby(formula::Symbol, by::Symbol, design::AbstractSurveyDesign, func::Function, params = []) + gdf = groupby(design.data, by) + return combine(gdf, [formula ] => ((a) -> func(a , design ,params...)) => AsTable) +end \ No newline at end of file diff --git a/src/svymean.jl b/src/svymean.jl index d6141d19..0fb1c7f3 100644 --- a/src/svymean.jl +++ b/src/svymean.jl @@ -18,10 +18,22 @@ function var_of_mean(x::Symbol, design::SimpleRandomSample) return design.fpc / design.sampsize * var(design.data[!, x]) end +function var_of_mean(x::AbstractVector, design::SimpleRandomSample) + return design.fpc / design.sampsize * var(x) +end + function sem(x, design::SimpleRandomSample) return sqrt(var_of_mean(x, design)) end +function sem(x::AbstractVector, design::SimpleRandomSample) + return sqrt(var_of_mean(x, design)) +end + function svymean(x, design::SimpleRandomSample) return DataFrame(mean = mean(design.data[!, x]), sem = sem(x, design::SimpleRandomSample)) end + +function svymean(x::AbstractVector , design::SimpleRandomSample) + return DataFrame(mean = mean(x), sem = sem(x, design::SimpleRandomSample)) +end \ No newline at end of file