Skip to content

Commit

Permalink
Merge pull request #48 from smishr/design_update
Browse files Browse the repository at this point in the history
Add fpc, fix svytotal and svymean
  • Loading branch information
ayushpatnaikgit authored Aug 31, 2022
2 parents 841bf45 + ac38000 commit 3ba241a
Show file tree
Hide file tree
Showing 16 changed files with 146 additions and 249 deletions.
8 changes: 1 addition & 7 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
name: Documentation

on:
push:
branches:
- main
pull_request:
branches:
- main
on: [push, pull_request]
# can add tags if needed
jobs:
build:
Expand Down
123 changes: 81 additions & 42 deletions src/SurveyDesign.jl
Original file line number Diff line number Diff line change
@@ -1,62 +1,96 @@
# Helper function for nice printing
function print_short(x::AbstractVector)
function print_short(x)
# write floats in short form
if isa(x[1], Float64)
x = round.(x, sigdigits = 3)
end
# print short vectors or single values as they are, compress otherwise
if length(x) < 3
print(x)
else
print( x[1], ", ", x[2], ", ", x[3], " ...", " (length = ", length(x), ")")
print( x[1], ", ", x[2], ", ", x[3], " ... ", last(x))
end
end

"""
Supertype for every survey design type: `SimpleRandomSample`, `ClusterSample`
and `StratifiedSample`.
The data to a survey constructor is modified. To avoid this pass a copy of the data
instead of the original.
"""
abstract type AbstractSurveyDesign end

"""
A `SimpleRandomSample` object contains survey design information needed to
analyse surveys sampled by simple random sampling.
TODO: documentation about user making a copy
TODO: add fpc
By default popsize is same sampsize, unless explicitly provided
SimpleRandomSample <: AbstractSurveyDesign
Survey design sampled by simple random sampling.
The population size is equal to the sample size unless `popsize` is explicitly provided.
"""
struct SimpleRandomSample <: AbstractSurveyDesign
data::DataFrame
sample_size::Int
pop_size::Int
function SimpleRandomSample(data::DataFrame; sample_size = nrow(data), pop_size = nrow(data),
weights = ones(nrow(data)), probs = 1 ./ weights)
# add frequency weights, probability weights and sample size columns
# TODO: make lines 28 & 29 use a helper function?
sampsize::UInt
popsize::Union{UInt,Nothing}
sampfraction::Real
fpc::Real
ignorefpc::Bool
function SimpleRandomSample(data::DataFrame;
popsize = nothing,
sampsize = nrow(data),
weights = ones(nrow(data)), # Check the defaults
probs = nothing,
ignorefpc = true
)
if isa(weights, Symbol)
weights = data[!, weights]
end
# set population size if it is not given; `weights` and `sampsize` must be given
if isnothing(popsize)
popsize = round(sum(weights)) |> UInt
end
# add frequency weights column to `data`
data[!, :weights] = weights
data[!, :probs] = probs
# add probability weights column to `data`
data[!, :probs] = 1 ./ data[!, :weights]
# set sampling fraction
sampfraction = sampsize / popsize
# set fpc
fpc = ignorefpc ? 1 : 1 - (sampsize / popsize)

new(data, sample_size, pop_size)
new(data, sampsize, popsize, sampfraction, fpc, ignorefpc)
end
end

# `show` method for printing information about a `SimpleRandomSample` after construction
# TODO: change `show` to 3 argument method
function Base.show(io::IO, design::SimpleRandomSample)
printstyled("Simple Random Sample:\n")
function Base.show(io::IO, ::MIME"text/plain", design::SimpleRandomSample)
printstyled("Simple Random Sample:\n"; bold = true)
printstyled("data: "; bold = true)
print(size(design.data)[1], "x", size(design.data)[2], " DataFrame")
print(size(design.data, 1), "x", size(design.data, 2), " DataFrame")
printstyled("\nweights: "; bold = true)
print_short(design.data.weights)
printstyled("\nprobs: "; bold = true)
print_short(design.data.probs)
# TODO: change fpc
printstyled("\nfpc: "; bold = true)
print("\n popsize: ")
print(design.pop_size)
print("\n sampsize: ")
print(design.sample_size)
print_short(design.fpc)
printstyled("\n popsize: "; bold = true)
print(design.popsize)
printstyled("\n sampsize: "; bold = true)
print(design.sampsize)
end

"""
A `StratifiedSample` object holds information necessary for surveys sampled by
stratification.
StratifiedSample <: AbstractSurveyDesign
Survey design sampled by stratification.
"""
struct StratifiedSample <: AbstractSurveyDesign
data::DataFrame
sampsize::UInt
popsize::Union{UInt,Nothing}
sampfraction::Real
fpc::Real
nofpc::Bool
function StratifiedSample(data::DataFrame, strata::AbstractVector; weights = ones(nrow(data)), probs = 1 ./ weights)
# add frequency weights, probability weights and sample size columns
data[!, :weights] = weights
Expand All @@ -72,40 +106,45 @@ end

# `show` method for printing information about a `StratifiedSample` after construction
function Base.show(io::IO, design::StratifiedSample)
printstyled("Stratified Sample:\n")
printstyled("Stratified Sample:\n"; bold = true)
printstyled("data: "; bold = true)
print(size(design.data)[1], "x", size(design.data)[2], " DataFrame")
print(size(design.data, 1), "x", size(design.data, 2), " DataFrame")
printstyled("\nweights: "; bold = true)
print_short(design.data.weights)
printstyled("\nprobs: "; bold = true)
print_short(design.data.probs)
printstyled("\nstrata: "; bold = true)
print_short(design.data.strata)
# TODO: change fpc
printstyled("\nfpc: "; bold = true)
print("\n popsize: ")
print_short(design.data.popsize)
print("\n sampsize: ")
print_short(design.data.sampsize)
print_short(design.fpc)
printstyled("\n popsize: "; bold = true)
print(design.popsize)
printstyled("\n sampsize: "; bold = true)
print(design.sampsize)
end

"""
A `ClusterSample` object holds information necessary for surveys sampled by
clustering.
ClusterSample <: AbstractSurveyDesign
Survey design sampled by clustering.
"""
struct ClusterSample <: AbstractSurveyDesign
data::DataFrame
end

# `show` method for printing information about a `ClusterSample` after construction
function Base.show(io::IO, design::ClusterSample)
printstyled("Simple Random Sample:\n")
printstyled("Cluster Sample:\n"; bold = true)
printstyled("data: "; bold = true)
print(size(design.data)[1], "x", size(design.data)[2], " DataFrame")
print(size(design.data, 1), "x", size(design.data, 2), " DataFrame")
printstyled("\nweights: "; bold = true)
print_short(design.data.weights)
printstyled("\nprobs: "; bold = true)
print_short(design.data.probs)
# TODO: change fpc
printstyled("\nfpc: "; bold = true)
print("\n popsize: ")
print_short(design.data.popsize)
print("\n sampsize: ")
print_short(design.data.sampsize)
print_short(design.fpc)
printstyled("\n popsize: "; bold = true)
print(design.popsize)
printstyled("\n sampsize: "; bold = true)
print(design.sampsize)
end
22 changes: 5 additions & 17 deletions src/dimnames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
Get the dimensions of a `SurveyDesign`.
```jldoctest
julia> using Survey
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs);
julia> dim(srs)
(200, 44)
(200, 42)
```
"""
dim(design::AbstractSurveyDesign) = size(design.data)
Expand All @@ -19,8 +17,6 @@ dim(design::AbstractSurveyDesign) = size(design.data)
Method for `svydesign` object.
```jldoctest
julia> using Survey
julia> apistrat = load_data("apistrat");
julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc);
Expand All @@ -36,14 +32,12 @@ dim(design::svydesign) = size(design.variables)
Get the column names of a `SurveyDesign`.
```jldoctest
julia> using Survey
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs);
julia> colnames(srs)
44-element Vector{String}:
42-element Vector{String}:
"Column1"
"cds"
"stype"
Expand All @@ -55,15 +49,15 @@ julia> colnames(srs)
"cname"
"cnum"
"avg.ed"
"full"
"emer"
"enroll"
"api.stu"
"pw"
"fpc"
"weights"
"probs"
"popsize"
"sampsize"
```
"""
colnames(design::AbstractSurveyDesign) = names(design.data)
Expand All @@ -72,8 +66,6 @@ colnames(design::AbstractSurveyDesign) = names(design.data)
Method for `svydesign` objects.
```jldoctest
julia> using Survey
julia> apistrat = load_data("apistrat");
julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc);
Expand Down Expand Up @@ -109,16 +101,14 @@ colnames(design::svydesign) = names(design.variables)
Get the names of the rows and columns of a `SurveyDesign`.
```jldoctest
julia> using Survey
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs);
julia> dimnames(srs)
2-element Vector{Vector{String}}:
["1", "2", "3", "4", "5", "6", "7", "8", "9", "10" … "191", "192", "193", "194", "195", "196", "197", "198", "199", "200"]
["Column1", "cds", "stype", "name", "sname", "snum", "dname", "dnum", "cname", "cnum" … "full", "emer", "enroll", "api.stu", "pw", "fpc", "weights", "probs", "popsize", "sampsize"]
["Column1", "cds", "stype", "name", "sname", "snum", "dname", "dnum", "cname", "cnum" … "grad.sch", "avg.ed", "full", "emer", "enroll", "api.stu", "pw", "fpc", "weights", "probs"]
```
"""
dimnames(design::AbstractSurveyDesign) = [string.(1:size(design.data, 1)), names(design.data)]
Expand All @@ -127,8 +117,6 @@ dimnames(design::AbstractSurveyDesign) = [string.(1:size(design.data, 1)), names
Method for `svydesign` objects.
```jldoctest
julia> using Survey
julia> apistrat = load_data("apistrat");
julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc);
Expand Down
2 changes: 0 additions & 2 deletions src/svyboxplot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ The keyword arguments are all the arguments that can be passed to `mapping` in
[AlgebraOfGraphics](https://docs.juliahub.com/AlgebraOfGraphics/CHIaw/0.4.7/).
```@example svyboxplot
julia> using survey
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs);
Expand Down
10 changes: 4 additions & 6 deletions src/svydesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,17 @@
The `svydesign` object combines a data frame and all the survey design information needed to analyse it.
```jldoctest
julia> using Survey;
julia> apiclus1 = load_data("apiclus1");
julia> dclus1 = svydesign(id= :dnum, weights= :pw, data = apiclus1, fpc= :fpc) |> print
Survey Design:
variables: 183x45 DataFrame
id: dnum
strata: 1, 1, 1 ... (length = 183)
probs: 0.029544719150814778, 0.029544719150814778, 0.029544719150814778 ... (length = 183)
strata: 1, 1, 1 ... 1
probs: 0.029544719150814778, 0.029544719150814778, 0.029544719150814778 ... 0.029544719150814778
fpc:
popsize: 757, 757, 757 ... (length = 183)
sampsize: 183, 183, 183 ... (length = 183)
popsize: 757, 757, 757 ... 757
sampsize: 183, 183, 183 ... 183
nest: false
check_strat: true
```
Expand Down
2 changes: 0 additions & 2 deletions src/svyglm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ svyglm(formula, design, dist, link)
The `svyglm` function can be used to fit glms on svydesign.
```jldoctest
julia> using Survey
julia> apiclus1 = load_data("apiclus1");
julia> dclus1 = svydesign(id=:dnum, weights=:pw, data = apiclus1);
Expand Down
2 changes: 0 additions & 2 deletions src/svyhist.jl
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ The `weights` argument should be a `Symbol` specifying a design variable.
For the complete argument list see [Makie.hist](https://makie.juliaplots.org/stable/examples/plotting_functions/hist/).
```julia
julia> using Survey
julia> apisrs = load_data("apisrs");
julia> srs = SimpleRandomSample(apisrs);
Expand Down
Loading

0 comments on commit 3ba241a

Please sign in to comment.