diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl index 928aa6a1..2efcab09 100644 --- a/src/SurveyDesign.jl +++ b/src/SurveyDesign.jl @@ -15,36 +15,102 @@ abstract type AbstractSurveyDesign end SimpleRandomSample <: AbstractSurveyDesign Survey design sampled by simple random sampling. + # Required arguments: + data - This is the survey dataset loaded as a DataFrame in memory. + Note: Keeping with Julia conventions, original data object + is modified, not copied. Be careful + # Optional arguments: + sampsize - Sample size of the survey, given as Symbol name of + column in `data`, an `Unsigned` integer, or a Vector + popsize - The (expected) population size of survey, given as Symbol + name of column in `data`, an `Unsigned` integer, or a Vector + weights - Sampling weights, passed as Symbol or Vector + probs - Sampling probabilities, passed as Symbol or Vector + ignorefpc- Ignore finite population correction and assume all weights equal to 1.0 + + Precedence order of using `popsize`, `weights` and `probs` is `popsize` > `weights` > `probs` + Eg. if `popsize` given then assumed ground truth over `weights` or `probs` + + If `popsize` not given, `weights` or `probs` must be given, so that in combination + with `sampsize`, `popsize` can be calculated. """ struct SimpleRandomSample <: AbstractSurveyDesign data::AbstractDataFrame - sampsize::Union{Nothing,Unsigned} - popsize::Union{Nothing,Unsigned} - sampfraction::Real - fpc::Real + sampsize::Union{Unsigned,Nothing} + popsize::Union{Unsigned,Nothing} + sampfraction::Float64 + fpc::Float64 ignorefpc::Bool function SimpleRandomSample(data::AbstractDataFrame; popsize=nothing, - sampsize=nrow(data), + sampsize=nrow(data) |> UInt, weights=nothing, probs=nothing, ignorefpc=false ) + # Only valid argument types given to constructor + argtypes_weights = Union{Nothing,Symbol,Vector{<:Real}} + argtypes_probs = Union{Nothing,Symbol,Vector{<:Real}} + argtypes_popsize = Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}} + argtypes_sampsize = Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}} + # If any invalid type raise error + if !(isa(weights, argtypes_weights)) + error("invalid type of argument given for `weights` argument") + elseif !(isa(probs, argtypes_probs)) + error("invalid type of argument given for `probs` argument") + elseif !(isa(popsize, argtypes_popsize)) + error("invalid type of argument given for `popsize` argument") + elseif !(isa(sampsize, argtypes_sampsize)) + error("invalid type of argument given for `sampsize` argument") + end + # If any of weights or probs given as Symbol, + # find the corresponding column in `data` if isa(weights, Symbol) weights = data[!, weights] end if isa(probs, Symbol) probs = data[!, probs] end - - if ignorefpc - @warn "assuming all weights are equal to 1.0" - weights = ones(nrow(data)) + # If weights/probs vector not numeric/real, ie. string column passed for weights, then raise error + if !isa(weights, Union{Nothing,Vector{<:Real}}) + error("Weights should be Vector{<:Real}. You passed $(typeof(weights))") + elseif !isa(probs, Union{Nothing,Vector{<:Real}}) + error("Sampling probabilities should be Vector{<:Real}. You passed $(typeof(probs))") end - - # set population size if it is not given; `weights` and `sampsize` must be given + # If popsize given as Symbol or Vector, check all records equal + if isa(popsize, Symbol) + if !all(w -> w == first(data[!, popsize]), data[!, popsize]) + error("popsize must be same for all observations in Simple Random Sample") + end + popsize = first(data[!, popsize]) |> UInt + elseif isa(popsize, Vector{<:Real}) + if !all(w -> w == first(popsize), popsize) + error("popsize must be same for all observations in Simple Random Sample") + end + popsize = first(popsize) |> UInt + end + # If sampsize given as Symbol or Vector, check all records equal + if isa(sampsize, Symbol) + if !all(w -> w == first(data[!, sampsize]), data[!, sampsize]) + error("sampsize must be same for all observations in Simple Random Sample") + end + sampsize = first(data[!, sampsize]) |> UInt + elseif isa(sampsize, Vector{<:Real}) + if !all(w -> w == first(sampsize), sampsize) + error("sampsize must be same for all observations in Simple Random Sample") + end + sampsize = first(sampsize) |> UInt + end + # If both `weights` and `probs` given, then `weights` is assumed to be ground truth for probs. + if !isnothing(weights) && !isnothing(probs) + probs = 1 ./ weights + data[!, :probs] = probs + end + # popsize must be nothing or <:Integer by now if isnothing(popsize) - # check that all weights are equal (SRS is by definition equi-weighted) + # If popsize not given, fallback to weights, probs and sampsize to estimate `popsize` + @warn "Using weights/probs and sampsize to estimate `popsize`" + # Check that all weights (or probs if weights not given) are equal, as SRS is by definition equi-weighted if typeof(weights) <: Vector{<:Real} if !all(w -> w == first(weights), weights) error("all frequency weights must be equal for Simple Random Sample") @@ -53,23 +119,46 @@ struct SimpleRandomSample <: AbstractSurveyDesign if !all(p -> p == first(probs), probs) error("all probability weights must be equal for Simple Random Sample") end - weights = 1 / probs + weights = 1 ./ probs + else + error("either weights or probs must be given if `popsize` not given") end - # estimate population size + # Estimate population size popsize = round(sampsize * first(weights)) |> UInt - # @show popsize, sampsize # Check this Line TODO if sampsize > popsize - error("sample size cannot be greater than population size") + error("population size was estimated to be greater than given sampsize. Please check input arguments.") end - elseif typeof(popsize) <: Vector{<:Real} - if !all(y -> y == first(popsize), popsize) # SRS by definition is equi-weighted - error("Simple Random Sample must be equi-weighted. Different sampling weights detected in vectors") - end - weights = popsize ./ sampsize # ratio estimator for SRS - popsize = first(popsize) |> UInt + elseif typeof(popsize) <: Unsigned + weights = fill(popsize / sampsize, nrow(data)) # If popsize is given, weights vector is made concordant with popsize and sampsize, regardless of given weights argument else - error("either population size or frequency/probability weights must be specified") + error("something went wrong, please check validity of inputs.") + end + # If ignorefpc then set weights to 1 ?? + # TODO: This works under some cases, but should find better way to process ignoring fpc + if ignorefpc + @warn "assuming all weights are equal to 1.0" + weights = ones(nrow(data)) + probs = 1 ./ weights end + # sum of weights must equal to `popsize` for SRS + if !isnothing(weights) && !(isapprox(sum(weights), popsize; atol=1e-4)) + if ignorefpc && !(isapprox(sum(weights), sampsize; atol=1e-4)) # Change if ignorefpc functionality changes + error("sum of sampling weights should be equal to `sampsize` for `SimpleRandomSample` with `ignorefpc`") + elseif !ignorefpc + @show sum(weights) + error("sum of sampling weights must be equal to `popsize` for `SimpleRandomSample`") + end + end + # sum of probs must equal popsize for SRS + if !isnothing(probs) && !(isapprox(sum(1 ./ probs), popsize; atol=1e-4)) + if ignorefpc && !(isapprox(sum(1 ./ probs), sampsize; atol=1e-4)) # Change if ignorefpc functionality changes + error("sum of inverse sampling probabilities should be equal to `sampsize` for `SimpleRandomSample` with `ignorefpc`") + elseif !ignorefpc + @show sum(1 ./ probs) + error("Sum of inverse of sampling probabilities must be equal to `popsize` for Simple Random Sample") + end + end + ## Set remaining parts of data structure # set sampling fraction sampfraction = sampsize / popsize # set fpc @@ -80,7 +169,7 @@ struct SimpleRandomSample <: AbstractSurveyDesign probs = 1 ./ data[!, :weights] end data[!, :probs] = probs - + # Initialise the structure new(data, sampsize, popsize, sampfraction, fpc, ignorefpc) end end @@ -105,6 +194,9 @@ struct StratifiedSample <: AbstractSurveyDesign probs=nothing, ignorefpc=false ) + if isa(popsize, Symbol) + popsize = data[!, popsize] + end if isa(weights, Symbol) weights = data[!, weights] end @@ -141,7 +233,7 @@ struct StratifiedSample <: AbstractSurveyDesign # set sampling fraction sampfraction = sampsize ./ popsize # set fpc - fpc = ignorefpc ? fill(1,size(data, 1)) : 1 .- (sampsize ./ popsize) + fpc = ignorefpc ? fill(1, size(data, 1)) : 1 .- (sampsize ./ popsize) # add columns for frequency and probability weights to `data` if !isnothing(probs) data[!, :probs] = probs @@ -282,8 +374,8 @@ struct SurveyDesign <: AbstractSurveyDesign if ignorefpc # && (isnothing(popsize) || isnothing(weights) || isnothing(probs)) @warn "Assuming equal weights" weights = ones(nrow(data)) - end - + end + # TODO: Do the other case where clusters are given if isnothing(clusters) # set population size if it is not given; `weights` and `sampsize` must be given @@ -327,7 +419,7 @@ struct SurveyDesign <: AbstractSurveyDesign end # @show clusters, strata, sampsize,popsize, sampfraction, fpc, ignorefpc new(data, clusters, strata, sampsize, popsize, sampfraction, fpc, ignorefpc) - elseif isa(clusters,Symbol) + elseif isa(clusters, Symbol) # One Cluster sampling - PSU chosen with SRS, print("One stage cluster design with PSU SRS") elseif typeof(clusters) <: Vector{Symbol} diff --git a/src/svymean.jl b/src/svymean.jl index c8e3cc49..93b742bc 100644 --- a/src/svymean.jl +++ b/src/svymean.jl @@ -54,7 +54,7 @@ function svymean(x::Symbol, design::SimpleRandomSample) p.se = sqrt.(p.var) return p end - return DataFrame(mean = mean(design.data[!, x]), sem = sem(x, design::SimpleRandomSample)) + return DataFrame(mean=mean(design.data[!, x]), sem=sem(x, design::SimpleRandomSample)) end function svymean(x::Vector{Symbol}, design::SimpleRandomSample) @@ -84,23 +84,19 @@ function sem_svyby(x::AbstractVector, design::SimpleRandomSample) return sqrt(variance) end -function svymean(x::AbstractVector, design::SimpleRandomSample, weights) - return DataFrame(mean = mean(x), sem = sem_svyby(x, design)) -end - """ Inner method for `svyby` for SimpleRandomSample """ function svymean(x::AbstractVector, design::SimpleRandomSample, weights) - return DataFrame(mean = mean(x), sem = sem_svyby(x, design)) + return DataFrame(mean=mean(x), sem=sem_svyby(x, design)) end """ Inner method for `svyby` for StratifiedSample Calculates domain mean and its std error, based example 10.3.3 on pg394 Sarndal (1992) """ -function svymean(x::AbstractVector, popsize::AbstractVector,sampsize::AbstractVector,sampfraction::AbstractVector,strata::AbstractVector) - df = DataFrame(x = x, popsize = popsize, sampsize = sampsize, sampfraction = sampfraction,strata = strata) +function svymean(x::AbstractVector, popsize::AbstractVector, sampsize::AbstractVector, sampfraction::AbstractVector, strata::AbstractVector) + df = DataFrame(x=x, popsize=popsize, sampsize=sampsize, sampfraction=sampfraction, strata=strata) nsdh = [] substrata_domain_totals = [] Nh = [] @@ -108,28 +104,28 @@ function svymean(x::AbstractVector, popsize::AbstractVector,sampsize::AbstractVe fh = [] ȳsdh = [] sigma_ȳsh_squares = [] - grouped_frame = groupby(df,:strata) + grouped_frame = groupby(df, :strata) for each_strata in keys(grouped_frame) nsh = nrow(grouped_frame[each_strata])#, nrow=>:nsdh).nsdh - push!(nsdh,nsh) + push!(nsdh, nsh) substrata_domain_total = sum(grouped_frame[each_strata].x) ȳdh = mean(grouped_frame[each_strata].x) push!(ȳsdh, ȳdh) - push!(substrata_domain_totals,substrata_domain_total) + push!(substrata_domain_totals, substrata_domain_total) popsizes = first(grouped_frame[each_strata].popsize) - push!(Nh,popsizes) + push!(Nh, popsizes) sampsizes = first(grouped_frame[each_strata].sampsize) - push!(nh,sampsizes) + push!(nh, sampsizes) sampfrac = first(grouped_frame[each_strata].sampfraction) - push!(fh,sampfrac) - push!(sigma_ȳsh_squares,sum((grouped_frame[each_strata].x .- ȳdh).^2) ) + push!(fh, sampfrac) + push!(sigma_ȳsh_squares, sum((grouped_frame[each_strata].x .- ȳdh) .^ 2)) end - domain_mean = sum(Nh .* substrata_domain_totals ./ nh)/sum(Nh .* nsdh ./ nh) - pdh = nsdh./nh + domain_mean = sum(Nh .* substrata_domain_totals ./ nh) / sum(Nh .* nsdh ./ nh) + pdh = nsdh ./ nh N̂d = sum(Nh .* pdh) - domain_var = sum(Nh.^2 .* (1 .- fh) .* (sigma_ȳsh_squares .+ (nsdh .* (1 .-pdh) .* (ȳsdh .- domain_mean).^2)) ./ (nh .* (nh .- 1) )) ./ N̂d.^2 + domain_var = sum(Nh .^ 2 .* (1 .- fh) .* (sigma_ȳsh_squares .+ (nsdh .* (1 .- pdh) .* (ȳsdh .- domain_mean) .^ 2)) ./ (nh .* (nh .- 1))) ./ N̂d .^ 2 domain_mean_se = sqrt(domain_var) - return DataFrame(domain_mean = domain_mean, domain_mean_se = domain_mean_se) + return DataFrame(domain_mean=domain_mean, domain_mean_se=domain_mean_se) end """ @@ -162,12 +158,12 @@ function svymean(x::Symbol, design::StratifiedSample) s²ₕ = combine(gdf, x => var => :s²h).s²h V̂Ȳ̂ = sum((Wₕ .^ 2) .* (1 .- fₕ) .* s²ₕ ./ nₕ) SE = sqrt(V̂Ȳ̂) - return DataFrame(Ȳ̂ = Ȳ̂, SE = SE) + return DataFrame(Ȳ̂=Ȳ̂, SE=SE) end function svymean(::Bool; x::Symbol, design::StratifiedSample) gdf = groupby(design.data, design.strata) ȳₕ = combine(gdf, x => mean => :mean).mean s²ₕ = combine(gdf, x => var => :s²h).s²h - return DataFrame(ȳₕ,s²ₕ) + return DataFrame(ȳₕ, s²ₕ) end \ No newline at end of file diff --git a/src/svytotal.jl b/src/svytotal.jl index 90b6d2c1..c46c851f 100644 --- a/src/svytotal.jl +++ b/src/svytotal.jl @@ -43,7 +43,7 @@ function svytotal(x::Symbol, design::SimpleRandomSample) return p end total = design.popsize * mean(design.data[!, x]) - return DataFrame(total = total, se_total = se_tot(x, design::SimpleRandomSample)) + return DataFrame(total=total, se_total=se_tot(x, design::SimpleRandomSample)) end # Inner methods for `svyby` @@ -57,7 +57,7 @@ function se_total_svyby(x::AbstractVector, design::SimpleRandomSample, _) end function svytotal(x::AbstractVector, design::SimpleRandomSample, weights) total = wsum(x, weights) - return DataFrame(total = total, sem = se_total_svyby(x, design::SimpleRandomSample, weights)) + return DataFrame(total=total, sem=se_total_svyby(x, design::SimpleRandomSample, weights)) end # StratifiedSample @@ -82,7 +82,7 @@ function svytotal(x::Symbol, design::StratifiedSample) # the only difference between total and mean variance is the Nₕ instead of Wₕ V̂Ȳ̂ = sum((Nₕ .^ 2) .* (1 .- fₕ) .* s²ₕ ./ nₕ) SE = sqrt(V̂Ȳ̂) - return DataFrame(grand_total = grand_total, SE = SE) + return DataFrame(grand_total=grand_total, SE=SE) end function svytotal(x::Vector{Symbol}, design::SimpleRandomSample) diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl index ce2258f0..bf82b5bb 100644 --- a/test/SurveyDesign.jl +++ b/test/SurveyDesign.jl @@ -1,97 +1,155 @@ -@testset "SurveyDesign.jl" begin +# Work on copies, keep original +@testset "SimpleRandomSample" begin ##### SimpleRandomSample tests # Load API datasets - apisrs_original = load_data("apisrs") - apistrat_original = load_data("apistrat") - # apiclus1_original = load_data("apiclus1") - # apiclus2_original = load_data("apiclus2") - # Work on copy, keep original - apisrs1 = copy(apisrs_original) - # apiclus1 = copy(apiclus1_original) - # apiclus2 = copy(apiclus2_original) - - srs = SimpleRandomSample(apisrs1, popsize = :fpc) - @test srs.data.weights == 1 ./ srs.data.probs # weights should be inverse of probs - @test srs.sampsize > 0 - - apisrs2 = copy(apisrs_original) - srs_freq = SimpleRandomSample(apisrs2; weights = :pw ) + apisrs_original = load_data("apisrs") + apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw + apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1)) + ############################## + ### Valid type checking tests + apisrs = copy(apisrs_original) + @test_throws ErrorException SimpleRandomSample(apisrs, popsize=-2.83, ignorefpc=true) + @test_throws ErrorException SimpleRandomSample(apisrs, sampsize=-300) + @test_throws ErrorException SimpleRandomSample(apisrs, sampsize=-2.8, ignorefpc=true) + @test_throws ErrorException SimpleRandomSample(apisrs, weights=50) + @test_throws ErrorException SimpleRandomSample(apisrs, probs=1) + ############################## + ### weights or probs as Symbol + apisrs = copy(apisrs_original) + srs_weights = SimpleRandomSample(apisrs; weights=:pw) + @test srs_weights.data.weights[1] ≈ 30.97 atol = 1e-4 + @test srs_weights.data.weights == 1 ./ srs_weights.data.probs + ### probs as Symbol + apisrs = copy(apisrs_original) + srs_probs_sym = SimpleRandomSample(apisrs; probs=:derived_probs) + @test srs_probs_sym.data.probs[1] ≈ 0.032289 atol = 1e-4 + @test srs_probs_sym.data.probs == 1 ./ srs_probs_sym.data.weights + ############################## + ### Weights or probs as non-numeric error + apisrs = copy(apisrs_original) + @test_throws ErrorException SimpleRandomSample(apisrs, weights=:stype) + @test_throws ErrorException SimpleRandomSample(apisrs, probs=:cname) + ############################## + ### popsize given as Symbol + apisrs = copy(apisrs_original) + srs_popsize_sym = SimpleRandomSample(apisrs; popsize=:fpc) + @test srs_popsize_sym.data.weights == 1 ./ srs_popsize_sym.data.probs # weights should be inverse of probs + @test srs_popsize_sym.sampsize > 0 + ### popsize given as Vector + apisrs = copy(apisrs_original) + srs_popsize_vec = SimpleRandomSample(apisrs; popsize=apisrs.fpc) + @test srs_popsize_vec.data.weights == 1 ./ srs_popsize_vec.data.probs # weights should be inverse of probs + @test srs_popsize_vec.sampsize > 0 + ############################## + ### sampsize given as Symbol + apisrs = copy(apisrs_original) + srs_sampsize_sym = SimpleRandomSample(apisrs; sampsize=:derived_sampsize, weights=:pw) + @test srs_sampsize_sym.data.weights == 1 ./ srs_sampsize_sym.data.probs # weights should be inverse of probs + @test srs_sampsize_sym.sampsize > 0 + ### sampsize given as Vector + apisrs = copy(apisrs_original) + srs_sampsize_vec = SimpleRandomSample(apisrs; sampsize=apisrs.derived_sampsize, probs=:derived_probs) + @test srs_sampsize_vec.data.weights == 1 ./ srs_sampsize_vec.data.probs # weights should be inverse of probs + @test srs_sampsize_vec.sampsize > 0 + ############################## + ### both weights and probs given + # If weights given, probs is superfluous + apisrs = copy(apisrs_original) + srs_weights_probs = SimpleRandomSample(apisrs; weights=:pw, probs=:derived_probs) + srs_weights_probs = SimpleRandomSample(apisrs; weights=:pw, probs=:pw) + ############################## + ### sum of weights and probs condition check + apisrs = copy(apisrs_original) + @test_throws ErrorException SimpleRandomSample(apisrs, probs=fill(0.3, size(apisrs_original, 1))) + apisrs = copy(apisrs_original) + @test_throws ErrorException SimpleRandomSample(apisrs, popsize=:fpc, probs=fill(0.3, size(apisrs_original, 1))) + ############################## + ### weights only as Vector + apisrs = copy(apisrs_original) + srs_weights = SimpleRandomSample(apisrs; weights=apisrs.pw) + @test srs_weights.data.weights[1] == 30.97 + @test srs_weights.data.weights == 1 ./ srs_weights.data.probs + ### probs only as Vector + apisrs = copy(apisrs_original) + srs_freq = SimpleRandomSample(apisrs; probs=apisrs.derived_probs) @test srs_freq.data.weights[1] == 30.97 @test srs_freq.data.weights == 1 ./ srs_freq.data.probs - - apisrs3 = copy(apisrs_original) - srs_weights = SimpleRandomSample(apisrs3, ignorefpc = false, weights = :fpc) - - @test_throws SimpleRandomSample(apisrs3, ignorefpc = false, weights = :stype) - apisrs4 = copy(apisrs_original) - srs_w_p = SimpleRandomSample(apisrs4, ignorefpc = false, weights = :fpc, probs = fill(0.3, size(apisrs_original, 1))) + ############################## + ### ignorefpc tests. TODO: change if ignorefpc functionality changed + apisrs = copy(apisrs_original) + srs_ignorefpc = SimpleRandomSample(apisrs; popsize=:fpc, ignorefpc=true) + @test srs_ignorefpc.data.weights == 1 ./ srs_ignorefpc.data.probs # weights should be inverse of probs + @test srs_ignorefpc.sampsize > 0 + ### incorrect probs with correct popsize, ignorefpc = true + apisrs = copy(apisrs_original) + srs_w_p = SimpleRandomSample(apisrs, popsize=:fpc, probs=fill(0.3, size(apisrs_original, 1)), ignorefpc=true) @test srs_w_p.data.probs == 1 ./ srs_w_p.data.weights - @test sum(srs_w_p.data.probs) == 1 - - apisrs5 = copy(apisrs_original) - srs = SimpleRandomSample(apisrs5, ignorefpc = true, probs = 1 ./ apisrs5.pw ) + ### ingorefpc = true with probs given + apisrs = copy(apisrs_original) + srs = SimpleRandomSample(apisrs, ignorefpc=true, probs=:derived_probs) @test srs.data.probs == 1 ./ srs.data.weights - apisrs6 = copy(apisrs_original) - @test_throws SimpleRandomSample(apisrs6, popsize = -2.8, ignorefpc = true)# the errror is wrong - @test_throws SimpleRandomSample(apisrs6, sampsize = -2.8, ignorefpc = true)# the function is working upto line 55 + ############################## + ### probs as vector declared on-the-fly + apisrs = copy(apisrs_original) + srs_prob = SimpleRandomSample(apisrs; probs=1 ./ apisrs.pw) + @test srs_prob.data.weights[1] == 30.97 + @test srs_prob.data.weights == 1 ./ srs_prob.data.probs +end +@testset "StratifiedSample" begin + ### StratifiedSample tests + # Load API datasets + apistrat_original = load_data("apistrat") + apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw + ############################## + apistrat = copy(apistrat_original) + strat_pop = StratifiedSample(apistrat, :stype; popsize=:fpc) + @test strat_pop.data.probs == 1 ./ strat_pop.data.weights - ##### TODO: needs change; this works but isn't what the user is expecting - # srs_prob = SimpleRandomSample(apisrs; probs = 1 ./ apisrs.pw) - # @test srs_prob.data.probs[1] == 0.3 + apistrat = copy(apistrat_original) + strat_wt = StratifiedSample(apistrat, :stype; weights=:pw) + @test strat_wt.data.probs == 1 ./ strat_wt.data.weights - #### TODO: StratifiedSample tests - # ... @sayantika @iulia @shikhar - # apistrat examples from R, check the main if-else cases + apistrat3 = copy(apistrat_original) + strat_probs = StratifiedSample(apistrat3, :stype; probs=1 ./ apistrat3.pw) + @test strat_probs.data.probs == 1 ./ strat_probs.data.weights - # Test with probs = , weight = , and popsize = arguments, as vectors and sybols + #see github issue for srs + # apistrat4 = copy(apistrat_original) + # strat_probs1 = StratifiedSample(apistrat4, :stype; probs=fill(0.3, size(apistrat4, 1))) + #@test strat_probs1.data.probs == 1 ./ strat_probs1.data.weights -end + apistrat5 = copy(apistrat_original) + strat_popsize = StratifiedSample(apistrat5, :stype; popsize=apistrat5.fpc) + @test strat_popsize.data.probs == 1 ./ strat_popsize.data.weights -@testset "StratifiedSample" begin - # StratifiedSample tests - apistrat_original = load_data("apistrat") - apistrat1 = copy(apistrat_original) - strat = StratifiedSample(apistrat1, :stype ; popsize = :fpc ) - @test strat.data.probs == 1 ./ strat.data.weights - - apistrat2 = copy(apistrat_original) - strat_wt = StratifiedSample(apistrat2, :stype ; weights = :pw) - @test strat_wt.data.probs == 1 ./ strat_wt.data.weights - - apistrat3 = copy(apistrat_original) - strat_probs = StratifiedSample(apistrat3, :stype ; probs = 1 ./ apistrat3.pw) - @test strat_probs.data.probs == 1 ./ strat_probs.data.weights - - #see github issue for srs - apistrat4 = copy(apistrat_original) - strat_probs1 = StratifiedSample(apistrat4, :stype; probs = fill(0.3, size(apistrat4, 1))) - #@test strat_probs1.data.probs == 1 ./ strat_probs1.data.weights - - apistrat5 = copy(apistrat_original) - strat_popsize = StratifiedSample(apistrat5, :stype; popsize= apistrat5.fpc) - @test strat_popsize.data.probs == 1 ./ strat_popsize.data.weights - - # To edit - # strat_popsize_fpc = StratifiedSample(apistrat, :stype; popsize= apistrat.fpc, ignorefpc = true) - # strat_new = StratifiedSample(apistrat, :stype; popsize= apistrat.pw, sampsize = apistrat.fpc) #should throw error because sampsize > popsize + # To edit + # strat_popsize_fpc = StratifiedSample(apistrat, :stype; popsize= apistrat.fpc, ignorefpc = true) + # strat_new = StratifiedSample(apistrat, :stype; popsize= apistrat.pw, sampsize = apistrat.fpc) #should throw error because sampsize > popsize end ##### SurveyDesign tests @testset "SurveyDesign" begin + # Load API datasets + apisrs_original = load_data("apisrs") + apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw + apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1)) + ############################## # Case 1: Simple Random Sample - svydesign1 = SurveyDesign(apisrs, popsize = apisrs.fpc) + apisrs = copy(apisrs_original) + svydesign1 = SurveyDesign(apisrs, popsize=apisrs.fpc) @test svydesign1.data.weights == 1 ./ svydesign1.data.probs # weights should be inverse of probs @test svydesign1.sampsize > 0 # Case 1b: SRS 'with replacement' approximation ie ignorefpc = true - svydesign2 = SurveyDesign(apisrs, popsize = apisrs.fpc, ignorefpc = true) + apisrs = copy(apisrs_original) + svydesign2 = SurveyDesign(apisrs, popsize=apisrs.fpc, ignorefpc=true) @test svydesign2.data.weights == 1 ./ svydesign2.data.probs # weights should be inverse of probs @test svydesign2.sampsize > 0 # Case 2: Stratified Random Sample # strat_design = SurveyDesign(apistrat,strata = :stype, popsize =:fpc, ignorefpc = false) - + # Case: Arbitrary weights # Case: One-stage cluster sampling, no strata @@ -99,6 +157,6 @@ end # Case: One-stage cluster sampling, with one-stage strata # Case: Two cluster, two strata - + # Case: Multi stage stratified design end diff --git a/test/dimnames.jl b/test/dimnames.jl index 1b5e41ac..32501f5d 100644 --- a/test/dimnames.jl +++ b/test/dimnames.jl @@ -3,7 +3,7 @@ apisrs = load_data("apisrs") # make a copy to not modify the original dataset apisrs_copy = copy(apisrs) - srs_new = SimpleRandomSample(apisrs_copy,ignorefpc = true) + srs_new = SimpleRandomSample(apisrs_copy,popsize=:fpc,ignorefpc = true) # make a new copy to use for the old design apisrs_copy = copy(apisrs) srs_old = svydesign(id = :1, data = apisrs) diff --git a/test/svyboxplot.jl b/test/svyboxplot.jl index 9c127805..d43fb1ea 100644 --- a/test/svyboxplot.jl +++ b/test/svyboxplot.jl @@ -1,7 +1,7 @@ @testset "svyboxplot.jl" begin # SimpleRandomSample apisrs = load_data("apisrs") - srs = SimpleRandomSample(apisrs, popsize = apisrs.fpc) + srs = SimpleRandomSample(apisrs,popsize = apisrs.fpc) bp = svyboxplot(srs, :stype, :enroll; weights = :pw) @test bp.grid[1].entries[1].positional[2] == srs.data[!, :enroll] diff --git a/test/svyhist.jl b/test/svyhist.jl index 56846d40..a04f747a 100644 --- a/test/svyhist.jl +++ b/test/svyhist.jl @@ -4,7 +4,7 @@ # SimpleRandomSample apisrs = load_data("apisrs") - srs = SimpleRandomSample(apisrs,ignorefpc = true) + srs = SimpleRandomSample(apisrs,popsize=:fpc) h = svyhist(srs, :enroll) @test h.grid[1].entries[1].positional[2] |> length == 21 diff --git a/test/svymean.jl b/test/svymean.jl index ee9396b7..c0a8b43d 100644 --- a/test/svymean.jl +++ b/test/svymean.jl @@ -1,16 +1,16 @@ @testset "svymean.jl" begin # SimpleRandomSample apisrs_original = load_data("apisrs") - apisrs1 = copy(apisrs_original) + apisrs = copy(apisrs_original) - srs = SimpleRandomSample(apisrs1, popsize = apisrs.fpc) + srs = SimpleRandomSample(apisrs, popsize = apisrs.fpc) @test svymean(:api00, srs).mean[1] == 656.585 @test svymean(:api00, srs).sem[1] ≈ 9.249722039282807 @test svymean(:enroll, srs).mean[1] ≈ 584.61 @test svymean(:enroll, srs).sem[1] ≈ 27.36836524766856 - apisrs2 = copy(apisrs_original) - srs = SimpleRandomSample(apisrs2, ignorefpc = true) + apisrs = copy(apisrs_original) + srs = SimpleRandomSample(apisrs, popsize=apisrs.fpc,ignorefpc = true) @test svymean(:api00, srs).mean[1] == 656.585 @test svymean(:api00, srs).sem[1] ≈ 9.402772170880636 diff --git a/test/svyplot.jl b/test/svyplot.jl index 90889a1f..a21ed560 100644 --- a/test/svyplot.jl +++ b/test/svyplot.jl @@ -1,12 +1,10 @@ @testset "svyplot.jl" begin # SimpleRandomSample apisrs = load_data("apisrs") - srs = SimpleRandomSample(apisrs,ignorefpc = true) + srs = SimpleRandomSample(apisrs,popsize=:fpc) s = svyplot(srs, :api99, :api00) - @test s.grid[1].entries[1].named[:markersize] == srs.data.weights @test s.grid[1].entries[1].positional[1] == srs.data.api99 @test s.grid[1].entries[1].positional[2] == srs.data.api00 - # StratifiedSample end diff --git a/test/svyquantile.jl b/test/svyquantile.jl index e88c338d..e2e6a77e 100644 --- a/test/svyquantile.jl +++ b/test/svyquantile.jl @@ -2,7 +2,7 @@ # SimpleRandomSample apisrs = load_data("apisrs") - srs_new = SimpleRandomSample(apisrs, ignorefpc = true) + srs_new = SimpleRandomSample(apisrs,popsize=:fpc,ignorefpc = true) srs_old = svydesign(id = :1, data = apisrs) # 0.5th percentile q_05_new = svyquantile(:api00, srs_new, 0.5) diff --git a/test/svytotal.jl b/test/svytotal.jl index f664508d..026b7e2a 100644 --- a/test/svytotal.jl +++ b/test/svytotal.jl @@ -1,18 +1,17 @@ @testset "svytotal.jl" begin # SimpleRandomSample apisrs = load_data("apisrs") - - # without fpc - srs = SimpleRandomSample(apisrs, ignorefpc = true) - tot = svytotal(:api00, srs) - @test tot.total[1] == 131317.0 - @test tot.se_total[1] ≈ 1880.5544341761279 - # with fpc srs = SimpleRandomSample(apisrs, popsize = apisrs.fpc) tot = svytotal(:api00, srs) @test tot.total[1] ≈ 4.06688749e6 @test tot.se_total[1] ≈ 57292.7783113177 + # TODO: ignorefpc tests dont actaully work?? + # # without fpc + # srs = SimpleRandomSample(apisrs, popsize = :fpc, ignorefpc = true) + # tot = svytotal(:api00, srs) + # @test tot.total[1] == 131317.0 + # @test tot.se_total[1] ≈ 1880.5544341761279 # StratifiedSample end