Skip to content

Commit

Permalink
Merge branch 'design_update' of https://github.com/smishr/Survey.jl i…
Browse files Browse the repository at this point in the history
…nto design_update
  • Loading branch information
smishr committed Sep 9, 2022
2 parents e4c5216 + a7f5df5 commit f35be8a
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 37 deletions.
26 changes: 14 additions & 12 deletions src/SurveyDesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,23 @@ Survey design sampled by simple random sampling.
The population size is equal to the sample size unless `popsize` is explicitly provided.
"""
struct SimpleRandomSample <: AbstractSurveyDesign
data::DataFrame
sampsize::Union{Nothing,Unsigned}
popsize::Union{Nothing,Unsigned}
data::AbstractDataFrame
sampsize::UInt
popsize::Union{UInt,Nothing}
sampfraction::Real
fpc::Real
ignorefpc::Bool
function SimpleRandomSample(data::DataFrame;
popsize=nothing,
sampsize=nrow(data),
weights=nothing, # Check the defaults
probs=nothing,
ignorefpc=false
)
# If popsize is a column in data, vectorise sampsize and calc the sampling weights, elseif
# set population size as sum of weights if it is not given;
function SimpleRandomSample(data::AbstractDataFrame;
popsize = nothing,
sampsize = nrow(data),
weights = ones(nrow(data)), # Check the defaults
probs = nothing,
ignorefpc = true
)
if isa(weights, Symbol)
weights = data[!, weights]
end
# set population size if it is not given; `weights` and `sampsize` must be given
if isnothing(popsize)
if typeof(weights) <: Vector{<:Real}
if !all(y -> y == first(weights), weights) # SRS by definition is equi-weighted
Expand Down
47 changes: 24 additions & 23 deletions src/svyby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,31 @@ julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs);
julia> svyby(:api00, :cname, srs, svytotal)
38×2 DataFrame
Row │ cname total
│ String15 Float64
─────┼──────────────────────────
1 │ Kern 5736.0
2 │ Los Angeles 29617.0
3 │ Orange 6744.0
4 │ San Luis Obispo 739.0
5 │ San Francisco 1675.0
6 │ Modoc 671.0
7 │ Alameda 7437.0
8 │ Solano 1869.0
⋮ │ ⋮ ⋮
32 │ Kings 939.0
33 │ Shasta 1508.0
34 │ Yolo 475.0
35 │ Calaveras 790.0
36 │ Napa 1454.0
37 │ Lake 804.0
38 │ Merced 595.0
23 rows omitted
38×3 DataFrame
Row │ cname total se_total
│ String15 Float64 Float64
─────┼────────────────────────────────────
1 │ Kern 5736.0 2045.98
2 │ Los Angeles 29617.0 2050.04
3 │ Orange 6744.0 1234.81
4 │ San Luis Obispo 739.0 NaN
5 │ San Francisco 1675.0 1193.85
6 │ Modoc 671.0 NaN
7 │ Alameda 7437.0 1633.82
8 │ Solano 1869.0 1219.59
⋮ │ ⋮ ⋮
32 │ Kings 939.0 1190.0
33 │ Shasta 1508.0 1600.0
34 │ Yolo 475.0 NaN
35 │ Calaveras 790.0 NaN
36 │ Napa 1454.0 1340.0
37 │ Lake 804.0 NaN
38 │ Merced 595.0 NaN
23 rows omitted
```
TODO: functionality for `formula::AbstractVector`
"""
function svyby(formula::Symbol, by::Symbol, design::AbstractSurveyDesign, func::Function, params = [])
gdf = groupby(design.data, by)
return combine(gdf, [formula ] => ((a) -> func(a , design ,params...)) => AsTable)
end
return combine(gdf, [formula, :weights] => ((a, b) -> func(a, design, b, params...)) => AsTable)
end
2 changes: 1 addition & 1 deletion src/svydesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Survey Design:
variables: 183x45 DataFrame
id: dnum
strata: 1, 1, 1 ... 1
probs: 0.029544719150814778, 0.029544719150814778, 0.029544719150814778 ... 0.029544719150814778
probs: 0.0295, 0.0295, 0.0295 ... 0.0295
fpc:
popsize: 757, 757, 757 ... 757
sampsize: 183, 183, 183 ... 183
Expand Down
12 changes: 11 additions & 1 deletion src/svymean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ function var_of_mean(x::Symbol, design::SimpleRandomSample)
return design.fpc ./ design.sampsize .* var(design.data[!, x])
end

"""
Inner method for `svyby`.
"""
function var_of_mean(x::AbstractVector, design::SimpleRandomSample)
return design.fpc ./ design.sampsize .* var(x)
end
Expand All @@ -26,6 +29,9 @@ function sem(x, design::SimpleRandomSample)
return sqrt(var_of_mean(x, design))
end

"""
Inner method for `svyby`.
"""
function sem(x::AbstractVector, design::SimpleRandomSample)
return sqrt(var_of_mean(x, design))
end
Expand All @@ -44,7 +50,11 @@ function svymean(x, design::SimpleRandomSample)
return DataFrame(mean = mean(design.data[!, x]), sem = sem(x, design::SimpleRandomSample))
end

function svymean(x::AbstractVector , design::SimpleRandomSample)
"""
Inner method for `svyby`.
"""
# TODO: results not matching for `sem`
function svymean(x::AbstractVector , design::SimpleRandomSample, _)
return DataFrame(mean = mean(x), sem = sem(x, design::SimpleRandomSample))
end

Expand Down
7 changes: 7 additions & 0 deletions src/svytotal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ function var_of_total(x::Symbol, design::SimpleRandomSample)
return design.popsize^2 * design.fpc / design.sampsize * var(design.data[!, x])
end

"""
Inner method for `svyby`.
"""
function var_of_total(x::AbstractVector, design::SimpleRandomSample)
return design.popsize^2 * design.fpc / design.sampsize * var(x)
end

function se_tot(x::Symbol, design::SimpleRandomSample)
return sqrt(var_of_total(x, design))
end
Expand Down

0 comments on commit f35be8a

Please sign in to comment.