diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl index 2efcab09..aa28973b 100644 --- a/src/SurveyDesign.jl +++ b/src/SurveyDesign.jl @@ -14,25 +14,46 @@ abstract type AbstractSurveyDesign end """ SimpleRandomSample <: AbstractSurveyDesign - Survey design sampled by simple random sampling. - # Required arguments: - data - This is the survey dataset loaded as a DataFrame in memory. - Note: Keeping with Julia conventions, original data object - is modified, not copied. Be careful - # Optional arguments: - sampsize - Sample size of the survey, given as Symbol name of - column in `data`, an `Unsigned` integer, or a Vector - popsize - The (expected) population size of survey, given as Symbol - name of column in `data`, an `Unsigned` integer, or a Vector - weights - Sampling weights, passed as Symbol or Vector - probs - Sampling probabilities, passed as Symbol or Vector - ignorefpc- Ignore finite population correction and assume all weights equal to 1.0 - - Precedence order of using `popsize`, `weights` and `probs` is `popsize` > `weights` > `probs` - Eg. if `popsize` given then assumed ground truth over `weights` or `probs` +Survey design sampled by simple random sampling. +# Required arguments: +data - This is the survey dataset loaded as a DataFrame in memory. + Note: Keeping with Julia conventions, original data object + is modified, not copied. Be careful +# Optional arguments: +sampsize - Sample size of the survey, given as Symbol name of + column in `data`, an `Unsigned` integer, or a Vector +popsize - The (expected) population size of survey, given as Symbol + name of column in `data`, an `Unsigned` integer, or a Vector +weights - Sampling weights, passed as Symbol or Vector +probs - Sampling probabilities, passed as Symbol or Vector +ignorefpc- Ignore finite population correction and assume all weights equal to 1.0 + +Precedence order of using `popsize`, `weights` and `probs` is `popsize` > `weights` > `probs` +Eg. if `popsize` given then assumed ground truth over `weights` or `probs` + +If `popsize` not given, `weights` or `probs` must be given, so that in combination +with `sampsize`, `popsize` can be calculated. + +```jldoctest +julia> apisrs_original = load_data("apisrs"); + +julia> apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw; + +julia> apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1)); + +julia> srs = SimpleRandomSample(apisrs_original; popsize=:fpc); - If `popsize` not given, `weights` or `probs` must be given, so that in combination - with `sampsize`, `popsize` can be calculated. +julia> srs +SimpleRandomSample: +data: 200x44 DataFrame +weights: 31.0, 31.0, 31.0, ..., 31.0 +probs: 0.0323, 0.0323, 0.0323, ..., 0.0323 +fpc: 6194, 6194, 6194, ..., 6194 +popsize: 6194 +sampsize: 200 +sampfraction: 0.0323 +ignorefpc: false +``` """ struct SimpleRandomSample <: AbstractSurveyDesign data::AbstractDataFrame @@ -73,9 +94,9 @@ struct SimpleRandomSample <: AbstractSurveyDesign end # If weights/probs vector not numeric/real, ie. string column passed for weights, then raise error if !isa(weights, Union{Nothing,Vector{<:Real}}) - error("Weights should be Vector{<:Real}. You passed $(typeof(weights))") + error("weights should be Vector{<:Real}. You passed $(typeof(weights))") elseif !isa(probs, Union{Nothing,Vector{<:Real}}) - error("Sampling probabilities should be Vector{<:Real}. You passed $(typeof(probs))") + error("sampling probabilities should be Vector{<:Real}. You passed $(typeof(probs))") end # If popsize given as Symbol or Vector, check all records equal if isa(popsize, Symbol) @@ -106,10 +127,10 @@ struct SimpleRandomSample <: AbstractSurveyDesign probs = 1 ./ weights data[!, :probs] = probs end - # popsize must be nothing or <:Integer by now + # popsize must be nothing or <:Unsigned by now if isnothing(popsize) # If popsize not given, fallback to weights, probs and sampsize to estimate `popsize` - @warn "Using weights/probs and sampsize to estimate `popsize`" + @warn "popsize not given. using weights/probs and sampsize to estimate `popsize`" # Check that all weights (or probs if weights not given) are equal, as SRS is by definition equi-weighted if typeof(weights) <: Vector{<:Real} if !all(w -> w == first(weights), weights) @@ -125,14 +146,16 @@ struct SimpleRandomSample <: AbstractSurveyDesign end # Estimate population size popsize = round(sampsize * first(weights)) |> UInt - if sampsize > popsize - error("population size was estimated to be greater than given sampsize. Please check input arguments.") - end elseif typeof(popsize) <: Unsigned weights = fill(popsize / sampsize, nrow(data)) # If popsize is given, weights vector is made concordant with popsize and sampsize, regardless of given weights argument + probs = 1 ./ weights else error("something went wrong, please check validity of inputs.") end + # If sampsize greater than popsize than illogical arguments specified. + if sampsize > popsize + error("population size was estimated to be less than given sampsize. Please check input arguments.") + end # If ignorefpc then set weights to 1 ?? # TODO: This works under some cases, but should find better way to process ignoring fpc if ignorefpc @@ -155,7 +178,7 @@ struct SimpleRandomSample <: AbstractSurveyDesign error("sum of inverse sampling probabilities should be equal to `sampsize` for `SimpleRandomSample` with `ignorefpc`") elseif !ignorefpc @show sum(1 ./ probs) - error("Sum of inverse of sampling probabilities must be equal to `popsize` for Simple Random Sample") + error("sum of inverse of sampling probabilities must be equal to `popsize` for Simple Random Sample") end end ## Set remaining parts of data structure @@ -177,76 +200,183 @@ end """ StratifiedSample <: AbstractSurveyDesign - Survey design sampled by stratification. +Survey design sampled by stratification. + +`strata` must be specified as a Symbol name of a column in `data`. + +# Required arguments: +data - This is the survey dataset loaded as a DataFrame in memory. + Note: Keeping with Julia conventions, original data object + is modified, not copied. Be careful +strata - Column that is the stratification variable. +# Optional arguments: +sampsize - Sample size of the survey, given as Symbol name of + column in `data`, an `Unsigned` integer, or a Vector +popsize - The (expected) population size of survey, given as Symbol + name of column in `data`, an `Unsigned` integer, or a Vector +weights - Sampling weights, passed as Symbol or Vector +probs - Sampling probabilities, passed as Symbol or Vector +ignorefpc- Ignore finite population correction and assume all weights equal to 1.0 + +Precedence order of using `popsize`, `weights` and `probs` is `popsize` > `weights` > `probs` +Eg. if `popsize` given then assumed ground truth over `weights` or `probs` + +If `popsize` not given, `weights` or `probs` must be given, so that in combination +with `sampsize`, `popsize` can be calculated. + +```jldoctest +julia> apistrat_original = load_data("apistrat"); + +julia> apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw; + +julia> apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw; + +julia> strat_pop = StratifiedSample(apistrat_original, :stype; popsize=:fpc); + +julia> strat_pop +StratifiedSample: +data: 200x47 DataFrame +strata: stype +weights: 44.2, 44.2, 44.2, ..., 15.1 +probs: 0.0226, 0.0226, 0.0226, ..., 0.0662 +fpc: 0.977, 0.977, 0.977, ..., 0.934 +popsize: 4421, 4421, 4421, ..., 755 +sampsize: 100, 100, 100, ..., 50 +sampfraction: 0.0226, 0.0226, 0.0226, ..., 0.0662 +ignorefpc: false +``` """ struct StratifiedSample <: AbstractSurveyDesign data::AbstractDataFrame strata::Symbol - sampsize::Union{Nothing,Vector{Real}} - popsize::Union{Nothing,Vector{Real}} - sampfraction::Vector{Real} - fpc::Vector{Real} ignorefpc::Bool function StratifiedSample(data::AbstractDataFrame, strata::Symbol; popsize=nothing, - sampsize=transform(groupby(data, strata), nrow => :counts).counts, + sampsize=nothing, weights=nothing, probs=nothing, ignorefpc=false ) - if isa(popsize, Symbol) - popsize = data[!, popsize] + # Only valid argument types given to constructor + argtypes_weights = Union{Nothing,Symbol,Vector{<:Real}} + argtypes_probs = Union{Nothing,Symbol,Vector{<:Real}} + argtypes_popsize = Union{Nothing,Symbol} + argtypes_sampsize = Union{Nothing,Symbol} + # If any invalid type raise error + if !(isa(weights, argtypes_weights)) + error("invalid type of argument given for `weights` argument") + elseif !(isa(probs, argtypes_probs)) + error("invalid type of argument given for `probs` argument") + elseif !(isa(popsize, argtypes_popsize)) + error("invalid type of argument given for `popsize` argument. Please give Symbol of the column in data") + elseif !(isa(sampsize, argtypes_sampsize)) + error("invalid type of argument given for `sampsize` argument. Please give Symbol of the column in data") end + # Store the iterator over each strata, as used multiple times + data_groupedby_strata = groupby(data, strata) + # If any of weights or probs given as Symbol, find the corresponding column in `data` if isa(weights, Symbol) - weights = data[!, weights] + for each_strata in keys(data_groupedby_strata) + if !all(w -> w == first(data_groupedby_strata[each_strata][!, weights]), data_groupedby_strata[each_strata][!, weights]) + error("sampling weights within each strata must be equal in StratifiedSample") + end + end + # original_weights_colname = copy(weights) + weights = data[!, weights] # If all good with weights column, then store it as Vector end if isa(probs, Symbol) - probs = data[!, probs] + for each_strata in keys(data_groupedby_strata) + if !all(p -> p == first(data_groupedby_strata[each_strata][!, probs]), data_groupedby_strata[each_strata][!, probs]) + error("sampling probabilities within each strata must be equal in StratifiedSample") + end + end + # original_probs_colname = copy(probs) + probs = data[!, probs] # If all good with probs column, then store it as Vector end - - if ignorefpc - # TODO: change what happens if `ignorepfc == true` or if the user only - # specifies `data` - @warn "assuming equal weights" - weights = ones(nrow(data)) + # If weights/probs vector not numeric/real, ie. string column passed for weights, then raise error + if !isa(weights, Union{Nothing,Vector{<:Real}}) + error("weights should be Vector{<:Real}. You passed $(typeof(weights))") + elseif !isa(probs, Union{Nothing,Vector{<:Real}}) + error("sampling probabilities should be Vector{<:Real}. You passed $(typeof(probs))") end - - # set population size if it is not given; `weights` and `sampsize` must be given + # If popsize given as Symbol or Vector, check all records equal in each strata + if isa(popsize, Symbol) + for each_strata in keys(data_groupedby_strata) + if !all(w -> w == first(data_groupedby_strata[each_strata][!, popsize]), data_groupedby_strata[each_strata][!, popsize]) + error("popsize must be same for all observations within each strata in StratifiedSample") + end + end + # original_popsize_colname = copy(popsize) + popsize = data[!, popsize] + end + # If sampsize given as Symbol or Vector, check all records equal + if isa(sampsize, Symbol) + if isnothing(popsize) && isnothing(weights) && isnothing(probs) + error("if sampsize given, and popsize not given, then weights or probs must given to calculate popsize") + end + for each_strata in keys(data_groupedby_strata) + if !all(w -> w == first(data_groupedby_strata[each_strata][!, sampsize]), data_groupedby_strata[each_strata][!, sampsize]) + error("sampsize must be same for all observations within each strata in StratifiedSample") + end + end + # original_sampsize_colname = copy(sampsize) + sampsize = data[!, sampsize] + # If sampsize column not provided in constructor call, set it as nrow of strata + elseif isnothing(sampsize) + sampsize = transform(groupby(data, strata), nrow => :counts).counts + end + # If both `weights` and `probs` given, then `weights` is assumed to be ground truth for probs. + if !isnothing(weights) && !isnothing(probs) + probs = 1 ./ weights + data[!, :probs] = probs + end + # `popsize` is either nothing or a Vector{<:Real} by now if isnothing(popsize) - # TODO: add probability weights if `weights` is not `nothing` + # If popsize not given, fallback to weights, probs and sampsize to estimate `popsize` + @warn "popsize not given. using weights/probs and sampsize to estimate `popsize` for StratifiedSample" + # Check that all weights (or probs if weights not given) are equal, as SRS is by definition equi-weighted if typeof(probs) <: Vector{<:Real} weights = 1 ./ probs + elseif !(typeof(weights) <: Vector{<:Real}) + error("either weights or probs must be given if `popsize` not given") end - # estimate population size + # Estimate population size popsize = sampsize .* weights - - if sampsize > popsize - error("sample size cannot be greater than population size") - end - elseif typeof(popsize) <: Vector{<:Real} - # TODO: change `elseif` condition - weights = popsize ./ sampsize # expansion estimator - # TODO: add probability weights + elseif typeof(popsize) <: Vector{<:Real} # Still need to check if the provided Column is of <:Real + # If popsize is given, weights and probs made concordant with popsize and sampsize, regardless of supplied arguments + weights = popsize ./ sampsize + probs = 1 ./ weights else - error("either population size or frequency/probability weights must be specified") + error("something went wrong. Please check validity of inputs.") + end + # If sampsize greater than popsize than illogical arguments specified. + if any(sampsize .> popsize) + @show sampsize, popsize + error("population sizes were estimated to be less than sampsize. please check input arguments.") + end + # If ignorefpc then set weights to 1 ?? + # TODO: This works under some cases, but should find better way to process ignoring fpc + if ignorefpc + @warn "assuming all weights are equal to 1.0" + weights = ones(nrow(data)) + probs = 1 ./ weights end + ## Set remaining parts of data structure # set sampling fraction sampfraction = sampsize ./ popsize # set fpc fpc = ignorefpc ? fill(1, size(data, 1)) : 1 .- (sampsize ./ popsize) # add columns for frequency and probability weights to `data` - if !isnothing(probs) - data[!, :probs] = probs - data[!, :weights] = 1 ./ data[!, :probs] - else - data[!, :weights] = weights - data[!, :probs] = 1 ./ data[!, :weights] + data[!, :weights] = weights + if isnothing(probs) + probs = 1 ./ data[!, :weights] end + data[!, :probs] = probs data[!, :sampsize] = sampsize data[!, :popsize] = popsize data[!, :fpc] = fpc data[!, :sampfraction] = sampfraction - new(data, strata, sampsize, popsize, sampfraction, fpc, ignorefpc) + new(data, strata, ignorefpc) end end diff --git a/src/dimnames.jl b/src/dimnames.jl index a79d6378..23f945a7 100644 --- a/src/dimnames.jl +++ b/src/dimnames.jl @@ -6,7 +6,7 @@ Get the dimensions of a `SurveyDesign`. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs; popsize =:fpc); julia> dim(srs) (200, 42) @@ -23,7 +23,7 @@ Get the column names of a `SurveyDesign`. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs; popsize=:fpc); julia> colnames(srs) 42-element Vector{String}: @@ -60,7 +60,7 @@ Get the names of the rows and columns of a `SurveyDesign`. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs;popsize=:fpc); julia> dimnames(srs) 2-element Vector{Vector{String}}: diff --git a/src/show.jl b/src/show.jl index b3eb7876..44dae259 100644 --- a/src/show.jl +++ b/src/show.jl @@ -33,6 +33,21 @@ function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign) printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false) end +function Base.show(io::IO, ::MIME"text/plain", design::StratifiedSample) + type = typeof(design) + printstyled(io, "$type:\n"; bold=true) + printstyled(io, "data: "; bold=true) + println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame") + printinfo(io, "strata", string(design.strata); newline=true) + printinfo(io, "weights", makeshort(design.data.weights)) + printinfo(io, "probs", makeshort(design.data.probs)) + printinfo(io, "fpc", makeshort(design.data.fpc)) + printinfo(io, "popsize", makeshort(design.data.popsize)) + printinfo(io, "sampsize", makeshort(design.data.sampsize)) + printinfo(io, "sampfraction", makeshort(design.data.sampfraction)) + printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false) +end + "`show` method for printing information about a survey design" function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) type = typeof(design) diff --git a/src/svyby.jl b/src/svyby.jl index 1eda74eb..d149de59 100644 --- a/src/svyby.jl +++ b/src/svyby.jl @@ -6,7 +6,7 @@ Generate subsets of a survey design. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs; popsize =:fpc); julia> svyby(:api00, :cname, srs, svymean) 38×3 DataFrame @@ -32,7 +32,7 @@ julia> svyby(:api00, :cname, srs, svymean) 23 rows omitted ``` """ -function svyby(formula::Symbol, by::Symbol, design::AbstractSurveyDesign, func::Function, params = []) +function svyby(formula::Symbol, by::Symbol, design::SimpleRandomSample, func::Function, params = []) # TODO: add functionality for `formula::AbstractVector` gdf = groupby(design.data, by) return combine(gdf, [formula, :weights] => ((a, b) -> func(a, design, b, params...)) => AsTable) @@ -46,24 +46,30 @@ Generate subsets of a StratifiedSample. ```jldoctest julia> apistrat = load_data("apistrat"); -julia> strat = StratifiedSample(apistrat, :stype ; popsize = apistrat.fpc); +julia> strat = StratifiedSample(apistrat, :stype ; popsize =:fpc); julia> svyby(:api00, :cname, strat, svymean) 40×3 DataFrame - Row │ cname domain_mean domain_mean_se - │ String15 Float64 Float64 + Row │ cname domain_mean domain_mean_se + │ String15 Float64 Float64 ─────┼───────────────────────────────────────────── 1 │ Los Angeles 633.511 21.3912 2 │ Ventura 707.172 31.6856 3 │ Kern 678.235 53.1337 4 │ San Diego 704.121 32.3311 5 │ San Bernardino 567.551 32.0866 + 6 │ Riverside 590.901 13.6463 + 7 │ Fresno 553.635 35.7614 + 8 │ Alameda 695.16 51.3053 ⋮ │ ⋮ ⋮ ⋮ + 34 │ Santa Barbara 743.0 0.0 + 35 │ Siskiyou 780.0 0.0 + 36 │ Stanislaus 712.0 1.09858e-13 37 │ Napa 660.0 0.0 38 │ Mariposa 706.0 0.0 39 │ Mendocino 632.018 1.04942 40 │ Butte 627.0 0.0 - 31 rows omitted + 25 rows omitted ``` """ function svyby(formula::Symbol, by::Symbol, design::StratifiedSample, func::Function) diff --git a/src/svydesign.jl b/src/svydesign.jl index 894920cf..18020f51 100644 --- a/src/svydesign.jl +++ b/src/svydesign.jl @@ -2,21 +2,6 @@ svydesign Type incorporating all necessary information to describe a survey design. - -```jldoctest -julia> apistrat = load_data("apistrat"); - -julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc) -Survey Design: -variables: 200x45 DataFrame -id: 1 -strata: E, E, E, ..., H -probs: 0.0226, 0.0226, 0.0226, ..., 0.0662 -fpc: - popsize: 4421, 4421, 4421, ..., 755 - sampsize: 200, 200, 200, ..., 200 -nest: false -check_strat: true ``` """ struct svydesign diff --git a/src/svyglm.jl b/src/svyglm.jl index 485bcc41..abd87361 100644 --- a/src/svyglm.jl +++ b/src/svyglm.jl @@ -11,30 +11,7 @@ end """ svyglm(formula, design, dist, link) -Fit Generalized Linear Models (GLMs) on `svydesign`. - -```jldoctest -julia> apiclus1 = load_data("apiclus1"); - -julia> dclus1 = svydesign(id=:dnum, weights=:pw, data = apiclus1); - -julia> svyglm(@formula(ell~meals),dclus1,Normal(),IdentityLink()) -StatsModels.TableRegressionModel{GLM.GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Normal{Float64}, IdentityLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}} - -ell ~ 1 + meals - -Coefficients: -──────────────────────────────────────────────────────────────────────── - Coef. Std. Error z Pr(>|z|) Lower 95% Upper 95% -──────────────────────────────────────────────────────────────────────── -(Intercept) 6.86665 0.350512 19.59 <1e-84 6.17966 7.55364 -meals 0.410511 0.00613985 66.86 <1e-99 0.398477 0.422545 -──────────────────────────────────────────────────────────────────────── -Degrees of Freedom: 6193.000324249264 (i.e. Null); 6192.000324249264 Residual -Null Deviance: 1.7556928968296547e6 -Residual Deviance: 1.0196009035970895e6 -AIC: 49195.42124574161 -``` + Fit Generalized Linear Models (GLMs) on `svydesign`. """ mutable struct svyglm glm diff --git a/src/svyhist.jl b/src/svyhist.jl index decabaef..ef05bc53 100644 --- a/src/svyhist.jl +++ b/src/svyhist.jl @@ -12,7 +12,7 @@ Calculate the number of bins to use in a histogram using the Sturges rule. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs;popsize=:fpc); julia> sturges(srs, :enroll) 9 @@ -33,7 +33,7 @@ Calculate the number of bins to use in a histogram using the Freedman-Diaconis r ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs;popsize=:fpc); julia> freedman_diaconis(srs, :enroll) 18 @@ -63,7 +63,7 @@ For the complete argument list see [Makie.hist](https://makie.juliaplots.org/sta ```@example histogram apisrs = load_data("apisrs"); -srs = SimpleRandomSample(apisrs; weights = :pw); +srs = SimpleRandomSample(apisrs;popsize=:fpc); h = svyhist(srs, :enroll) save("hist.png", h); nothing # hide ``` diff --git a/src/svymean.jl b/src/svymean.jl index 93b742bc..5ab19d51 100644 --- a/src/svymean.jl +++ b/src/svymean.jl @@ -34,7 +34,7 @@ Compute the mean and SEM of the survey variable `x`. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs;popsize=:fpc); julia> svymean(:enroll, srs) 1×2 DataFrame diff --git a/src/svyquantile.jl b/src/svyquantile.jl index ef952a3c..7ad5ca5b 100644 --- a/src/svyquantile.jl +++ b/src/svyquantile.jl @@ -5,7 +5,7 @@ Estimate quantiles for `SurveyDesign`s. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs;popsize=:fpc); julia> svyquantile(:enroll, srs, 0.5) 1×1 DataFrame @@ -27,7 +27,6 @@ function svyquantile(var, design::StratifiedSample, q) w = design.data.probs df = DataFrame(tmp = quantile(Float32.(x), weights(w), q)) rename!(df, :tmp => Symbol(string(q) .* "th percentile")) - return df end diff --git a/src/svytotal.jl b/src/svytotal.jl index c46c851f..a5222fb9 100644 --- a/src/svytotal.jl +++ b/src/svytotal.jl @@ -20,7 +20,7 @@ Estimate the population total for the variable specified by `x`. ```jldoctest julia> apisrs = load_data("apisrs"); -julia> srs = SimpleRandomSample(apisrs; weights = :pw); +julia> srs = SimpleRandomSample(apisrs;popsize=:fpc); julia> svytotal(:enroll, srs) 1×2 DataFrame diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl index bf82b5bb..f3e74014 100644 --- a/test/SurveyDesign.jl +++ b/test/SurveyDesign.jl @@ -60,9 +60,9 @@ ############################## ### sum of weights and probs condition check apisrs = copy(apisrs_original) - @test_throws ErrorException SimpleRandomSample(apisrs, probs=fill(0.3, size(apisrs_original, 1))) + @test_throws ErrorException SimpleRandomSample(apisrs, weights=fill(0.3, size(apisrs_original, 1))) apisrs = copy(apisrs_original) - @test_throws ErrorException SimpleRandomSample(apisrs, popsize=:fpc, probs=fill(0.3, size(apisrs_original, 1))) + @test_throws ErrorException SimpleRandomSample(apisrs, probs=fill(0.3, size(apisrs_original, 1))) ############################## ### weights only as Vector apisrs = copy(apisrs_original) @@ -101,31 +101,68 @@ end # Load API datasets apistrat_original = load_data("apistrat") apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw + apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw ############################## + ### Valid type checking tests apistrat = copy(apistrat_original) - strat_pop = StratifiedSample(apistrat, :stype; popsize=:fpc) - @test strat_pop.data.probs == 1 ./ strat_pop.data.weights - + @test_throws ErrorException StratifiedSample(apistrat,:stype; popsize=-2.83, ignorefpc=true) + @test_throws ErrorException StratifiedSample(apistrat,:stype; sampsize=-300) + @test_throws ErrorException StratifiedSample(apistrat,:stype; sampsize=-2.8, ignorefpc=true) + @test_throws ErrorException StratifiedSample(apistrat,:stype; weights=50) + @test_throws ErrorException StratifiedSample(apistrat,:stype; probs=1) + ############################## + ### weights as Symbol apistrat = copy(apistrat_original) strat_wt = StratifiedSample(apistrat, :stype; weights=:pw) @test strat_wt.data.probs == 1 ./ strat_wt.data.weights - - apistrat3 = copy(apistrat_original) - strat_probs = StratifiedSample(apistrat3, :stype; probs=1 ./ apistrat3.pw) + ### probs as Symbol + apistrat = copy(apistrat_original) + strat_probs = StratifiedSample(apistrat, :stype; probs=:derived_probs) @test strat_probs.data.probs == 1 ./ strat_probs.data.weights - - #see github issue for srs - # apistrat4 = copy(apistrat_original) - # strat_probs1 = StratifiedSample(apistrat4, :stype; probs=fill(0.3, size(apistrat4, 1))) - #@test strat_probs1.data.probs == 1 ./ strat_probs1.data.weights - - apistrat5 = copy(apistrat_original) - strat_popsize = StratifiedSample(apistrat5, :stype; popsize=apistrat5.fpc) - @test strat_popsize.data.probs == 1 ./ strat_popsize.data.weights - - # To edit - # strat_popsize_fpc = StratifiedSample(apistrat, :stype; popsize= apistrat.fpc, ignorefpc = true) - # strat_new = StratifiedSample(apistrat, :stype; popsize= apistrat.pw, sampsize = apistrat.fpc) #should throw error because sampsize > popsize + ### weights as Vector{<:Real} + apistrat = copy(apistrat_original) + strat_wt = StratifiedSample(apistrat, :stype; weights=apistrat.pw) + @test strat_wt.data.probs == 1 ./ strat_wt.data.weights + ### probs as Vector{<:Real} + apistrat = copy(apistrat_original) + strat_probs = StratifiedSample(apistrat, :stype; probs=apistrat.derived_probs) + @test strat_probs.data.probs == 1 ./ strat_probs.data.weights + ############################## + ### popsize as Symbol + apistrat = copy(apistrat_original) + strat_pop = StratifiedSample(apistrat, :stype; popsize=:fpc) + @test strat_pop.data.probs == 1 ./ strat_pop.data.weights + ### popsize given as Vector (should give error for now, not implemented Vector input directly for popsize) + apistrat = copy(apistrat_original) + @test_throws ErrorException StratifiedSample(apistrat,:stype; popsize=apistrat.fpc) + ############################## + ### sampsize given as Symbol + apistrat = copy(apistrat_original) + strat_sampsize_sym = StratifiedSample(apistrat,:stype; sampsize=:derived_sampsize, weights=:pw) + @test strat_sampsize_sym.data.weights == 1 ./ strat_sampsize_sym.data.probs # weights should be inverse of probs + ### sampsize given as symbol without weights or probs, and popsize not given - raise error + apistrat = copy(apistrat_original) + @test_throws ErrorException StratifiedSample(apistrat,:stype; sampsize=:derived_sampsize) + ############################## + ### both weights and probs given + # If weights given, probs is superfluous + apistrat = copy(apistrat_original) + strat_weights_probs = StratifiedSample(apistrat,:stype; weights=:pw, probs=:derived_probs) + strat_weights_probs = StratifiedSample(apistrat,:stype; weights=:pw, probs=:pw) + ############################## + ### ignorefpc test (Modify if ignorefpc changed) + apistrat = copy(apistrat_original) + strat_ignorefpc=StratifiedSample(apistrat,:stype; popsize=:fpc, ignorefpc=true) + @test strat_ignorefpc.data.probs == 1 ./ strat_ignorefpc.data.weights + ############################## + # For now, no sum checks on probs and weights for StratifiedSample (unlike SRS) + apistrat = copy(apistrat_original) + strat_probs1 = StratifiedSample(apistrat, :stype; probs=fill(0.3, size(apistrat, 1))) + @test strat_probs1.data.probs == 1 ./ strat_probs1.data.weights + ############################## + #should throw error because sampsize > popsize + apistrat = copy(apistrat_original) + @test_throws ErrorException StratifiedSample(apistrat, :stype; popsize= :pw, sampsize=:fpc) end ##### SurveyDesign tests