Skip to content

Commit

Permalink
Merge pull request #56 from iuliadmtru/design_update
Browse files Browse the repository at this point in the history
Make `svyby` work with `svytotal` again, after #51
  • Loading branch information
ayushpatnaikgit authored Sep 8, 2022
2 parents 329967f + bf21785 commit aa0715a
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 30 deletions.
4 changes: 2 additions & 2 deletions src/SurveyDesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ Survey design sampled by simple random sampling.
The population size is equal to the sample size unless `popsize` is explicitly provided.
"""
struct SimpleRandomSample <: AbstractSurveyDesign
data::DataFrame
data::AbstractDataFrame
sampsize::UInt
popsize::Union{UInt,Nothing}
sampfraction::Real
fpc::Real
ignorefpc::Bool
function SimpleRandomSample(data::DataFrame;
function SimpleRandomSample(data::AbstractDataFrame;
popsize = nothing,
sampsize = nrow(data),
weights = ones(nrow(data)), # Check the defaults
Expand Down
47 changes: 24 additions & 23 deletions src/svyby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,31 @@ julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs);
julia> svyby(:api00, :cname, srs, svytotal)
38×2 DataFrame
Row │ cname total
│ String15 Float64
─────┼──────────────────────────
1 │ Kern 5736.0
2 │ Los Angeles 29617.0
3 │ Orange 6744.0
4 │ San Luis Obispo 739.0
5 │ San Francisco 1675.0
6 │ Modoc 671.0
7 │ Alameda 7437.0
8 │ Solano 1869.0
⋮ │ ⋮ ⋮
32 │ Kings 939.0
33 │ Shasta 1508.0
34 │ Yolo 475.0
35 │ Calaveras 790.0
36 │ Napa 1454.0
37 │ Lake 804.0
38 │ Merced 595.0
23 rows omitted
38×3 DataFrame
Row │ cname total se_total
│ String15 Float64 Float64
─────┼────────────────────────────────────
1 │ Kern 5736.0 2045.98
2 │ Los Angeles 29617.0 2050.04
3 │ Orange 6744.0 1234.81
4 │ San Luis Obispo 739.0 NaN
5 │ San Francisco 1675.0 1193.85
6 │ Modoc 671.0 NaN
7 │ Alameda 7437.0 1633.82
8 │ Solano 1869.0 1219.59
⋮ │ ⋮ ⋮
32 │ Kings 939.0 1190.0
33 │ Shasta 1508.0 1600.0
34 │ Yolo 475.0 NaN
35 │ Calaveras 790.0 NaN
36 │ Napa 1454.0 1340.0
37 │ Lake 804.0 NaN
38 │ Merced 595.0 NaN
23 rows omitted
```
TODO: functionality for `formula::AbstractVector`
"""
function svyby(formula::Symbol, by::Symbol, design::AbstractSurveyDesign, func::Function, params = [])
gdf = groupby(design.data, by)
return combine(gdf, [formula ] => ((a) -> func(a , design ,params...)) => AsTable)
end
return combine(gdf, [formula, :weights] => ((a, b) -> func(a, design, b, params...)) => AsTable)
end
2 changes: 1 addition & 1 deletion src/svydesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Survey Design:
variables: 183x45 DataFrame
id: dnum
strata: 1, 1, 1 ... 1
probs: 0.029544719150814778, 0.029544719150814778, 0.029544719150814778 ... 0.029544719150814778
probs: 0.0295, 0.0295, 0.0295 ... 0.0295
fpc:
popsize: 757, 757, 757 ... 757
sampsize: 183, 183, 183 ... 183
Expand Down
16 changes: 13 additions & 3 deletions src/svymean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ function var_of_mean(x::Symbol, design::SimpleRandomSample)
return design.fpc / design.sampsize * var(design.data[!, x])
end

"""
Inner method for `svyby`.
"""
function var_of_mean(x::AbstractVector, design::SimpleRandomSample)
return design.fpc / design.sampsize * var(x)
end
Expand All @@ -26,14 +29,21 @@ function sem(x, design::SimpleRandomSample)
return sqrt(var_of_mean(x, design))
end

"""
Inner method for `svyby`.
"""
function sem(x::AbstractVector, design::SimpleRandomSample)
return sqrt(var_of_mean(x, design))
end

function svymean(x, design::SimpleRandomSample)
function svymean(x::Symbol, design::SimpleRandomSample)
return DataFrame(mean = mean(design.data[!, x]), sem = sem(x, design::SimpleRandomSample))
end

function svymean(x::AbstractVector , design::SimpleRandomSample)
"""
Inner method for `svyby`.
"""
# TODO: results not matching for `sem`
function svymean(x::AbstractVector , design::SimpleRandomSample, _)
return DataFrame(mean = mean(x), sem = sem(x, design::SimpleRandomSample))
end
end
25 changes: 24 additions & 1 deletion src/svytotal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,36 @@ function var_of_total(x::Symbol, design::SimpleRandomSample)
return design.popsize^2 * design.fpc / design.sampsize * var(design.data[!, x])
end

"""
Inner method for `svyby`.
"""
function var_of_total(x::AbstractVector, design::SimpleRandomSample)
return design.popsize^2 * design.fpc / design.sampsize * var(x)
end

function se_tot(x::Symbol, design::SimpleRandomSample)
return sqrt(var_of_total(x, design))
end

"""
Inner method for `svyby`.
"""
function se_tot(x::AbstractVector, design::SimpleRandomSample)
return sqrt(var_of_total(x, design))
end

function svytotal(x::Symbol, design::SimpleRandomSample)
# total = design.pop_size * mean(design.data[!, variable])
total = wsum(design.data[!, x] , weights(design.data.weights) )
total = wsum(design.data[!, x], weights(design.data.weights))
return DataFrame(total = total, se_total = se_tot(x, design::SimpleRandomSample))
end

"""
Inner method for `svyby`.
"""
# TODO: results not matching for `sem`
function svytotal(x::AbstractArray, design::SimpleRandomSample, wts)
total = wsum(x, weights(wts))
return DataFrame(total = total , se_total = se_tot(x, design::SimpleRandomSample))
end

Expand Down

0 comments on commit aa0715a

Please sign in to comment.