From 8b8d7add22a7a980739fcce1cb299976e4b7f167 Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Wed, 7 Sep 2022 18:36:59 +0300 Subject: [PATCH 1/5] Add `svytotal` method for `svyby` and change `svyby`, second attempt --- src/svyby.jl | 3 ++- src/svymean.jl | 16 +++++++++++++--- src/svytotal.jl | 25 ++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/svyby.jl b/src/svyby.jl index 4d208f60..1980a3ae 100644 --- a/src/svyby.jl +++ b/src/svyby.jl @@ -31,8 +31,9 @@ julia> svyby(:api00, :cname, srs, svytotal) 38 │ Merced 595.0 23 rows omitted ``` +TODO: functionality for `formula::AbstractVector` """ function svyby(formula::Symbol, by::Symbol, design::AbstractSurveyDesign, func::Function, params = []) gdf = groupby(design.data, by) return combine(gdf, [formula ] => ((a) -> func(a , design ,params...)) => AsTable) -end \ No newline at end of file +end diff --git a/src/svymean.jl b/src/svymean.jl index 0fb1c7f3..97507d1b 100644 --- a/src/svymean.jl +++ b/src/svymean.jl @@ -18,6 +18,9 @@ function var_of_mean(x::Symbol, design::SimpleRandomSample) return design.fpc / design.sampsize * var(design.data[!, x]) end +""" +Inner method for `svyby`. +""" function var_of_mean(x::AbstractVector, design::SimpleRandomSample) return design.fpc / design.sampsize * var(x) end @@ -26,14 +29,21 @@ function sem(x, design::SimpleRandomSample) return sqrt(var_of_mean(x, design)) end +""" +Inner method for `svyby`. +""" function sem(x::AbstractVector, design::SimpleRandomSample) return sqrt(var_of_mean(x, design)) end -function svymean(x, design::SimpleRandomSample) +function svymean(x::Symbol, design::SimpleRandomSample) return DataFrame(mean = mean(design.data[!, x]), sem = sem(x, design::SimpleRandomSample)) end -function svymean(x::AbstractVector , design::SimpleRandomSample) +""" +Inner method for `svyby`. +""" +# TODO: results not matching for `sem` +function svymean(x::AbstractVector , design::SimpleRandomSample, _) return DataFrame(mean = mean(x), sem = sem(x, design::SimpleRandomSample)) -end \ No newline at end of file +end diff --git a/src/svytotal.jl b/src/svytotal.jl index 0dbc73f0..df6fc351 100644 --- a/src/svytotal.jl +++ b/src/svytotal.jl @@ -20,13 +20,36 @@ function var_of_total(x::Symbol, design::SimpleRandomSample) return design.popsize^2 * design.fpc / design.sampsize * var(design.data[!, x]) end +""" +Inner method for `svyby`. +""" +function var_of_total(x::AbstractVector, design::SimpleRandomSample) + return design.popsize^2 * design.fpc / design.sampsize * var(x) +end + function se_tot(x::Symbol, design::SimpleRandomSample) return sqrt(var_of_total(x, design)) end +""" +Inner method for `svyby`. +""" +function se_tot(x::AbstractVector, design::SimpleRandomSample) + return sqrt(var_of_total(x, design)) +end + function svytotal(x::Symbol, design::SimpleRandomSample) # total = design.pop_size * mean(design.data[!, variable]) - total = wsum(design.data[!, x] , weights(design.data.weights) ) + total = wsum(design.data[!, x], weights(design.data.weights)) + return DataFrame(total = total, se_total = se_tot(x, design::SimpleRandomSample)) +end + +""" +Inner method for `svyby`. +""" +# TODO: results not matching for `sem` +function svytotal(x::AbstractArray, design::SimpleRandomSample, wts) + total = wsum(x, weights(wts)) return DataFrame(total = total , se_total = se_tot(x, design::SimpleRandomSample)) end From 5187c5953e4222af136d31830ff6ebdaa6051e71 Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Wed, 7 Sep 2022 18:41:41 +0300 Subject: [PATCH 2/5] Actually change `svyby` --- src/svyby.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/svyby.jl b/src/svyby.jl index 1980a3ae..eefc314f 100644 --- a/src/svyby.jl +++ b/src/svyby.jl @@ -35,5 +35,5 @@ TODO: functionality for `formula::AbstractVector` """ function svyby(formula::Symbol, by::Symbol, design::AbstractSurveyDesign, func::Function, params = []) gdf = groupby(design.data, by) - return combine(gdf, [formula ] => ((a) -> func(a , design ,params...)) => AsTable) + return combine(gdf, [formula, :weights] => ((a, b) -> func(a, design, b, params...)) => AsTable) end From df38cb3dc26d0673bf5b16fc665eb8823fff2d13 Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Wed, 7 Sep 2022 18:44:29 +0300 Subject: [PATCH 3/5] Change `data` field from `DataFrame` to `AbstractDataFrame` --- src/SurveyDesign.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl index 7f5916a1..4b15d2e1 100644 --- a/src/SurveyDesign.jl +++ b/src/SurveyDesign.jl @@ -29,13 +29,13 @@ Survey design sampled by simple random sampling. The population size is equal to the sample size unless `popsize` is explicitly provided. """ struct SimpleRandomSample <: AbstractSurveyDesign - data::DataFrame + data::AbstractDataFrame sampsize::UInt popsize::Union{UInt,Nothing} sampfraction::Real fpc::Real ignorefpc::Bool - function SimpleRandomSample(data::DataFrame; + function SimpleRandomSample(data::AbstractDataFrame; popsize = nothing, sampsize = nrow(data), weights = ones(nrow(data)), # Check the defaults From 70c858407a7b66f417c9ab86d14b4c0fd26d63ee Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Wed, 7 Sep 2022 18:47:02 +0300 Subject: [PATCH 4/5] Fix docstring for `svyby` --- src/svyby.jl | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/svyby.jl b/src/svyby.jl index eefc314f..90ade71d 100644 --- a/src/svyby.jl +++ b/src/svyby.jl @@ -9,27 +9,27 @@ julia> apisrs = load_data("apisrs"); julia> srs = SimpleRandomSample(apisrs); julia> svyby(:api00, :cname, srs, svytotal) -38×2 DataFrame - Row │ cname total - │ String15 Float64 -─────┼────────────────────────── - 1 │ Kern 5736.0 - 2 │ Los Angeles 29617.0 - 3 │ Orange 6744.0 - 4 │ San Luis Obispo 739.0 - 5 │ San Francisco 1675.0 - 6 │ Modoc 671.0 - 7 │ Alameda 7437.0 - 8 │ Solano 1869.0 - ⋮ │ ⋮ ⋮ - 32 │ Kings 939.0 - 33 │ Shasta 1508.0 - 34 │ Yolo 475.0 - 35 │ Calaveras 790.0 - 36 │ Napa 1454.0 - 37 │ Lake 804.0 - 38 │ Merced 595.0 - 23 rows omitted +38×3 DataFrame + Row │ cname total se_total + │ String15 Float64 Float64 +─────┼──────────────────────────────────── + 1 │ Kern 5736.0 2045.98 + 2 │ Los Angeles 29617.0 2050.04 + 3 │ Orange 6744.0 1234.81 + 4 │ San Luis Obispo 739.0 NaN + 5 │ San Francisco 1675.0 1193.85 + 6 │ Modoc 671.0 NaN + 7 │ Alameda 7437.0 1633.82 + 8 │ Solano 1869.0 1219.59 + ⋮ │ ⋮ ⋮ ⋮ + 32 │ Kings 939.0 1190.0 + 33 │ Shasta 1508.0 1600.0 + 34 │ Yolo 475.0 NaN + 35 │ Calaveras 790.0 NaN + 36 │ Napa 1454.0 1340.0 + 37 │ Lake 804.0 NaN + 38 │ Merced 595.0 NaN + 23 rows omitted ``` TODO: functionality for `formula::AbstractVector` """ From bf2178530290c48ac8069f9185c0e71d024dc44a Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Wed, 7 Sep 2022 19:00:16 +0300 Subject: [PATCH 5/5] Fix docstring for `svydesign` --- src/svydesign.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/svydesign.jl b/src/svydesign.jl index 161fef32..dd0e6d93 100644 --- a/src/svydesign.jl +++ b/src/svydesign.jl @@ -18,7 +18,7 @@ Survey Design: variables: 183x45 DataFrame id: dnum strata: 1, 1, 1 ... 1 -probs: 0.029544719150814778, 0.029544719150814778, 0.029544719150814778 ... 0.029544719150814778 +probs: 0.0295, 0.0295, 0.0295 ... 0.0295 fpc: popsize: 757, 757, 757 ... 757 sampsize: 183, 183, 183 ... 183