diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl index 9480058f..30770b77 100644 --- a/src/SurveyDesign.jl +++ b/src/SurveyDesign.jl @@ -364,7 +364,7 @@ julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistak julia> dclus1 = OneStageClusterSample(apiclus1, :dnum; weights=:pw) OneStageClusterSample: -data: 183x45 DataFrame +data: 183x46 DataFrame cluster: dnum design.data[!,design.cluster]: 637, 637, 637, ..., 448 popsize: popsize @@ -400,7 +400,7 @@ struct OneStageClusterSample <: AbstractSurveyDesign data_groupedby_cluster = groupby(data, cluster) data[!, sampsize_labels] = fill(size(data_groupedby_cluster, 1),(nrow(data),)) weights = :weights - data[!, weights] = data[!, popsize] ./ data[!, sampsize_labels] + data[!, :weights] = data[!, popsize] ./ data[!, sampsize_labels] data[!, :probs] = 1 ./ data[!, weights] # Many formulae are easily defined in terms of sampling probabilties data[!, :allprobs] = data[!, :probs] # In one-stage cluster sample, allprobs is just probs, no multiplication needed data[!, :strata] = ones(nrow(data)) @@ -414,21 +414,17 @@ struct OneStageClusterSample <: AbstractSurveyDesign if !(typeof(data[!, weights]) <: Vector{<:Real}) error(string("given weights column ", weights , " is not of numeric type")) end - if !all(w -> w == first(data[!, weights]), data[!, weights]) - error("weights must be same for all observations for OneStageClusterSample") - end - # For one-stage sample only one sampsize vector sampsize_labels = :sampsize data_groupedby_cluster = groupby(data, cluster) data[!, sampsize_labels] = fill(size(data_groupedby_cluster, 1),(nrow(data),)) popsize = :popsize data[!, popsize] = data[!, weights] .* data[!, sampsize_labels] data[!, :probs] = 1 ./ data[!, weights] # Many formulae are easily defined in terms of sampling probabilties + data[!, :weights] = data[!, weights] data[!, :allprobs] = data[!, :probs] # In one-stage cluster sample, allprobs is just probs, no multiplication needed data[!, :strata] = ones(nrow(data)) pps = false has_strata = false new(data, cluster, popsize, sampsize_labels, weights, pps, has_strata) end -end - +end \ No newline at end of file diff --git a/src/total.jl b/src/total.jl index eaa5241d..087c6123 100644 --- a/src/total.jl +++ b/src/total.jl @@ -88,6 +88,22 @@ function total(x::Vector{Symbol}, design::AbstractSurveyDesign) return df end +""" +```jldoctest +julia> using Survey + +julia> apiclus1 = load_data("apiclus1"); + +julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc); + +julia> total(:api00, dclus1) +1×2 DataFrame + Row │ total SE + │ Float64 Float64 +─────┼────────────────────── + 1 │ 5.94916e6 1.33948e6 +``` +""" function total(x::Symbol, design::OneStageClusterSample) gdf = groupby(design.data, design.cluster) ŷₜ = combine(gdf, x => sum => :sum).sum