Skip to content

Commit

Permalink
Merge pull request #95 from smishr/design_update_stratified
Browse files Browse the repository at this point in the history
StratifiedSample software eng edits, reorder, testing
  • Loading branch information
smishr authored Nov 28, 2022
2 parents 004eb07 + 0918e46 commit a4b9790
Show file tree
Hide file tree
Showing 11 changed files with 287 additions and 138 deletions.
254 changes: 192 additions & 62 deletions src/SurveyDesign.jl

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions src/dimnames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Get the dimensions of a `SurveyDesign`.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs; popsize =:fpc);
julia> dim(srs)
(200, 42)
Expand All @@ -23,7 +23,7 @@ Get the column names of a `SurveyDesign`.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs; popsize=:fpc);
julia> colnames(srs)
42-element Vector{String}:
Expand Down Expand Up @@ -60,7 +60,7 @@ Get the names of the rows and columns of a `SurveyDesign`.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
julia> dimnames(srs)
2-element Vector{Vector{String}}:
Expand Down
15 changes: 15 additions & 0 deletions src/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@ function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
end

function Base.show(io::IO, ::MIME"text/plain", design::StratifiedSample)
type = typeof(design)
printstyled(io, "$type:\n"; bold=true)
printstyled(io, "data: "; bold=true)
println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
printinfo(io, "strata", string(design.strata); newline=true)
printinfo(io, "weights", makeshort(design.data.weights))
printinfo(io, "probs", makeshort(design.data.probs))
printinfo(io, "fpc", makeshort(design.data.fpc))
printinfo(io, "popsize", makeshort(design.data.popsize))
printinfo(io, "sampsize", makeshort(design.data.sampsize))
printinfo(io, "sampfraction", makeshort(design.data.sampfraction))
printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
end

"`show` method for printing information about a survey design"
function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
type = typeof(design)
Expand Down
18 changes: 12 additions & 6 deletions src/svyby.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Generate subsets of a survey design.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs; popsize =:fpc);
julia> svyby(:api00, :cname, srs, svymean)
38×3 DataFrame
Expand All @@ -32,7 +32,7 @@ julia> svyby(:api00, :cname, srs, svymean)
23 rows omitted
```
"""
function svyby(formula::Symbol, by::Symbol, design::AbstractSurveyDesign, func::Function, params = [])
function svyby(formula::Symbol, by::Symbol, design::SimpleRandomSample, func::Function, params = [])
# TODO: add functionality for `formula::AbstractVector`
gdf = groupby(design.data, by)
return combine(gdf, [formula, :weights] => ((a, b) -> func(a, design, b, params...)) => AsTable)
Expand All @@ -46,24 +46,30 @@ Generate subsets of a StratifiedSample.
```jldoctest
julia> apistrat = load_data("apistrat");
julia> strat = StratifiedSample(apistrat, :stype ; popsize = apistrat.fpc);
julia> strat = StratifiedSample(apistrat, :stype ; popsize =:fpc);
julia> svyby(:api00, :cname, strat, svymean)
40×3 DataFrame
Row │ cname domain_mean domain_mean_se
│ String15 Float64 Float64
Row │ cname domain_mean domain_mean_se
│ String15 Float64 Float64
─────┼─────────────────────────────────────────────
1 │ Los Angeles 633.511 21.3912
2 │ Ventura 707.172 31.6856
3 │ Kern 678.235 53.1337
4 │ San Diego 704.121 32.3311
5 │ San Bernardino 567.551 32.0866
6 │ Riverside 590.901 13.6463
7 │ Fresno 553.635 35.7614
8 │ Alameda 695.16 51.3053
⋮ │ ⋮ ⋮ ⋮
34 │ Santa Barbara 743.0 0.0
35 │ Siskiyou 780.0 0.0
36 │ Stanislaus 712.0 1.09858e-13
37 │ Napa 660.0 0.0
38 │ Mariposa 706.0 0.0
39 │ Mendocino 632.018 1.04942
40 │ Butte 627.0 0.0
31 rows omitted
25 rows omitted
```
"""
function svyby(formula::Symbol, by::Symbol, design::StratifiedSample, func::Function)
Expand Down
15 changes: 0 additions & 15 deletions src/svydesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,6 @@
svydesign
Type incorporating all necessary information to describe a survey design.
```jldoctest
julia> apistrat = load_data("apistrat");
julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc)
Survey Design:
variables: 200x45 DataFrame
id: 1
strata: E, E, E, ..., H
probs: 0.0226, 0.0226, 0.0226, ..., 0.0662
fpc:
popsize: 4421, 4421, 4421, ..., 755
sampsize: 200, 200, 200, ..., 200
nest: false
check_strat: true
```
"""
struct svydesign
Expand Down
25 changes: 1 addition & 24 deletions src/svyglm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,7 @@ end
"""
svyglm(formula, design, dist, link)
Fit Generalized Linear Models (GLMs) on `svydesign`.
```jldoctest
julia> apiclus1 = load_data("apiclus1");
julia> dclus1 = svydesign(id=:dnum, weights=:pw, data = apiclus1);
julia> svyglm(@formula(ell~meals),dclus1,Normal(),IdentityLink())
StatsModels.TableRegressionModel{GLM.GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Normal{Float64}, IdentityLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}}
ell ~ 1 + meals
Coefficients:
────────────────────────────────────────────────────────────────────────
Coef. Std. Error z Pr(>|z|) Lower 95% Upper 95%
────────────────────────────────────────────────────────────────────────
(Intercept) 6.86665 0.350512 19.59 <1e-84 6.17966 7.55364
meals 0.410511 0.00613985 66.86 <1e-99 0.398477 0.422545
────────────────────────────────────────────────────────────────────────
Degrees of Freedom: 6193.000324249264 (i.e. Null); 6192.000324249264 Residual
Null Deviance: 1.7556928968296547e6
Residual Deviance: 1.0196009035970895e6
AIC: 49195.42124574161
```
Fit Generalized Linear Models (GLMs) on `svydesign`.
"""
mutable struct svyglm
glm
Expand Down
6 changes: 3 additions & 3 deletions src/svyhist.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Calculate the number of bins to use in a histogram using the Sturges rule.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
julia> sturges(srs, :enroll)
9
Expand All @@ -33,7 +33,7 @@ Calculate the number of bins to use in a histogram using the Freedman-Diaconis r
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
julia> freedman_diaconis(srs, :enroll)
18
Expand Down Expand Up @@ -63,7 +63,7 @@ For the complete argument list see [Makie.hist](https://makie.juliaplots.org/sta
```@example histogram
apisrs = load_data("apisrs");
srs = SimpleRandomSample(apisrs; weights = :pw);
srs = SimpleRandomSample(apisrs;popsize=:fpc);
h = svyhist(srs, :enroll)
save("hist.png", h); nothing # hide
```
Expand Down
2 changes: 1 addition & 1 deletion src/svymean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Compute the mean and SEM of the survey variable `x`.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
julia> svymean(:enroll, srs)
1×2 DataFrame
Expand Down
3 changes: 1 addition & 2 deletions src/svyquantile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Estimate quantiles for `SurveyDesign`s.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
julia> svyquantile(:enroll, srs, 0.5)
1×1 DataFrame
Expand All @@ -27,7 +27,6 @@ function svyquantile(var, design::StratifiedSample, q)
w = design.data.probs
df = DataFrame(tmp = quantile(Float32.(x), weights(w), q))
rename!(df, :tmp => Symbol(string(q) .* "th percentile"))

return df
end

Expand Down
2 changes: 1 addition & 1 deletion src/svytotal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Estimate the population total for the variable specified by `x`.
```jldoctest
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs; weights = :pw);
julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
julia> svytotal(:enroll, srs)
1×2 DataFrame
Expand Down
79 changes: 58 additions & 21 deletions test/SurveyDesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@
##############################
### sum of weights and probs condition check
apisrs = copy(apisrs_original)
@test_throws ErrorException SimpleRandomSample(apisrs, probs=fill(0.3, size(apisrs_original, 1)))
@test_throws ErrorException SimpleRandomSample(apisrs, weights=fill(0.3, size(apisrs_original, 1)))
apisrs = copy(apisrs_original)
@test_throws ErrorException SimpleRandomSample(apisrs, popsize=:fpc, probs=fill(0.3, size(apisrs_original, 1)))
@test_throws ErrorException SimpleRandomSample(apisrs, probs=fill(0.3, size(apisrs_original, 1)))
##############################
### weights only as Vector
apisrs = copy(apisrs_original)
Expand Down Expand Up @@ -101,31 +101,68 @@ end
# Load API datasets
apistrat_original = load_data("apistrat")
apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw
apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw
##############################
### Valid type checking tests
apistrat = copy(apistrat_original)
strat_pop = StratifiedSample(apistrat, :stype; popsize=:fpc)
@test strat_pop.data.probs == 1 ./ strat_pop.data.weights

@test_throws ErrorException StratifiedSample(apistrat,:stype; popsize=-2.83, ignorefpc=true)
@test_throws ErrorException StratifiedSample(apistrat,:stype; sampsize=-300)
@test_throws ErrorException StratifiedSample(apistrat,:stype; sampsize=-2.8, ignorefpc=true)
@test_throws ErrorException StratifiedSample(apistrat,:stype; weights=50)
@test_throws ErrorException StratifiedSample(apistrat,:stype; probs=1)
##############################
### weights as Symbol
apistrat = copy(apistrat_original)
strat_wt = StratifiedSample(apistrat, :stype; weights=:pw)
@test strat_wt.data.probs == 1 ./ strat_wt.data.weights

apistrat3 = copy(apistrat_original)
strat_probs = StratifiedSample(apistrat3, :stype; probs=1 ./ apistrat3.pw)
### probs as Symbol
apistrat = copy(apistrat_original)
strat_probs = StratifiedSample(apistrat, :stype; probs=:derived_probs)
@test strat_probs.data.probs == 1 ./ strat_probs.data.weights

#see github issue for srs
# apistrat4 = copy(apistrat_original)
# strat_probs1 = StratifiedSample(apistrat4, :stype; probs=fill(0.3, size(apistrat4, 1)))
#@test strat_probs1.data.probs == 1 ./ strat_probs1.data.weights

apistrat5 = copy(apistrat_original)
strat_popsize = StratifiedSample(apistrat5, :stype; popsize=apistrat5.fpc)
@test strat_popsize.data.probs == 1 ./ strat_popsize.data.weights

# To edit
# strat_popsize_fpc = StratifiedSample(apistrat, :stype; popsize= apistrat.fpc, ignorefpc = true)
# strat_new = StratifiedSample(apistrat, :stype; popsize= apistrat.pw, sampsize = apistrat.fpc) #should throw error because sampsize > popsize
### weights as Vector{<:Real}
apistrat = copy(apistrat_original)
strat_wt = StratifiedSample(apistrat, :stype; weights=apistrat.pw)
@test strat_wt.data.probs == 1 ./ strat_wt.data.weights
### probs as Vector{<:Real}
apistrat = copy(apistrat_original)
strat_probs = StratifiedSample(apistrat, :stype; probs=apistrat.derived_probs)
@test strat_probs.data.probs == 1 ./ strat_probs.data.weights
##############################
### popsize as Symbol
apistrat = copy(apistrat_original)
strat_pop = StratifiedSample(apistrat, :stype; popsize=:fpc)
@test strat_pop.data.probs == 1 ./ strat_pop.data.weights
### popsize given as Vector (should give error for now, not implemented Vector input directly for popsize)
apistrat = copy(apistrat_original)
@test_throws ErrorException StratifiedSample(apistrat,:stype; popsize=apistrat.fpc)
##############################
### sampsize given as Symbol
apistrat = copy(apistrat_original)
strat_sampsize_sym = StratifiedSample(apistrat,:stype; sampsize=:derived_sampsize, weights=:pw)
@test strat_sampsize_sym.data.weights == 1 ./ strat_sampsize_sym.data.probs # weights should be inverse of probs
### sampsize given as symbol without weights or probs, and popsize not given - raise error
apistrat = copy(apistrat_original)
@test_throws ErrorException StratifiedSample(apistrat,:stype; sampsize=:derived_sampsize)
##############################
### both weights and probs given
# If weights given, probs is superfluous
apistrat = copy(apistrat_original)
strat_weights_probs = StratifiedSample(apistrat,:stype; weights=:pw, probs=:derived_probs)
strat_weights_probs = StratifiedSample(apistrat,:stype; weights=:pw, probs=:pw)
##############################
### ignorefpc test (Modify if ignorefpc changed)
apistrat = copy(apistrat_original)
strat_ignorefpc=StratifiedSample(apistrat,:stype; popsize=:fpc, ignorefpc=true)
@test strat_ignorefpc.data.probs == 1 ./ strat_ignorefpc.data.weights
##############################
# For now, no sum checks on probs and weights for StratifiedSample (unlike SRS)
apistrat = copy(apistrat_original)
strat_probs1 = StratifiedSample(apistrat, :stype; probs=fill(0.3, size(apistrat, 1)))
@test strat_probs1.data.probs == 1 ./ strat_probs1.data.weights
##############################
#should throw error because sampsize > popsize
apistrat = copy(apistrat_original)
@test_throws ErrorException StratifiedSample(apistrat, :stype; popsize= :pw, sampsize=:fpc)
end

##### SurveyDesign tests
Expand Down

0 comments on commit a4b9790

Please sign in to comment.