Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fpc, fix svytotal and svymean #48

Merged
merged 16 commits into from
Aug 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
name: Documentation

on:
push:
branches:
- main
pull_request:
branches:
- main
on: [push, pull_request]
# can add tags if needed
jobs:
build:
Expand Down
123 changes: 81 additions & 42 deletions src/SurveyDesign.jl
Original file line number Diff line number Diff line change
@@ -1,62 +1,96 @@
# Helper function for nice printing
function print_short(x::AbstractVector)
function print_short(x)
# write floats in short form
if isa(x[1], Float64)
x = round.(x, sigdigits = 3)
end
# print short vectors or single values as they are, compress otherwise
if length(x) < 3
print(x)
else
print( x[1], ", ", x[2], ", ", x[3], " ...", " (length = ", length(x), ")")
print( x[1], ", ", x[2], ", ", x[3], " ... ", last(x))
end
end

"""
Supertype for every survey design type: `SimpleRandomSample`, `ClusterSample`
and `StratifiedSample`.

The data to a survey constructor is modified. To avoid this pass a copy of the data
instead of the original.
"""
abstract type AbstractSurveyDesign end

"""
A `SimpleRandomSample` object contains survey design information needed to
analyse surveys sampled by simple random sampling.
TODO: documentation about user making a copy
TODO: add fpc
By default popsize is same sampsize, unless explicitly provided
SimpleRandomSample <: AbstractSurveyDesign

Survey design sampled by simple random sampling.

The population size is equal to the sample size unless `popsize` is explicitly provided.
"""
struct SimpleRandomSample <: AbstractSurveyDesign
data::DataFrame
sample_size::Int
pop_size::Int
function SimpleRandomSample(data::DataFrame; sample_size = nrow(data), pop_size = nrow(data),
weights = ones(nrow(data)), probs = 1 ./ weights)
# add frequency weights, probability weights and sample size columns
# TODO: make lines 28 & 29 use a helper function?
sampsize::UInt
popsize::Union{UInt,Nothing}
sampfraction::Real
fpc::Real
ignorefpc::Bool
function SimpleRandomSample(data::DataFrame;
popsize = nothing,
sampsize = nrow(data),
weights = ones(nrow(data)), # Check the defaults
probs = nothing,
ignorefpc = true
)
if isa(weights, Symbol)
weights = data[!, weights]
end
# set population size if it is not given; `weights` and `sampsize` must be given
if isnothing(popsize)
popsize = round(sum(weights)) |> UInt
end
# add frequency weights column to `data`
data[!, :weights] = weights
data[!, :probs] = probs
# add probability weights column to `data`
data[!, :probs] = 1 ./ data[!, :weights]
# set sampling fraction
sampfraction = sampsize / popsize
# set fpc
fpc = ignorefpc ? 1 : 1 - (sampsize / popsize)

new(data, sample_size, pop_size)
new(data, sampsize, popsize, sampfraction, fpc, ignorefpc)
end
end

# `show` method for printing information about a `SimpleRandomSample` after construction
# TODO: change `show` to 3 argument method
function Base.show(io::IO, design::SimpleRandomSample)
printstyled("Simple Random Sample:\n")
function Base.show(io::IO, ::MIME"text/plain", design::SimpleRandomSample)
printstyled("Simple Random Sample:\n"; bold = true)
printstyled("data: "; bold = true)
print(size(design.data)[1], "x", size(design.data)[2], " DataFrame")
print(size(design.data, 1), "x", size(design.data, 2), " DataFrame")
printstyled("\nweights: "; bold = true)
print_short(design.data.weights)
printstyled("\nprobs: "; bold = true)
print_short(design.data.probs)
# TODO: change fpc
printstyled("\nfpc: "; bold = true)
print("\n popsize: ")
print(design.pop_size)
print("\n sampsize: ")
print(design.sample_size)
print_short(design.fpc)
printstyled("\n popsize: "; bold = true)
print(design.popsize)
printstyled("\n sampsize: "; bold = true)
print(design.sampsize)
end

"""
A `StratifiedSample` object holds information necessary for surveys sampled by
stratification.
StratifiedSample <: AbstractSurveyDesign

Survey design sampled by stratification.
"""
struct StratifiedSample <: AbstractSurveyDesign
data::DataFrame
sampsize::UInt
popsize::Union{UInt,Nothing}
sampfraction::Real
fpc::Real
nofpc::Bool
function StratifiedSample(data::DataFrame, strata::AbstractVector; weights = ones(nrow(data)), probs = 1 ./ weights)
# add frequency weights, probability weights and sample size columns
data[!, :weights] = weights
Expand All @@ -72,40 +106,45 @@ end

# `show` method for printing information about a `StratifiedSample` after construction
function Base.show(io::IO, design::StratifiedSample)
printstyled("Stratified Sample:\n")
printstyled("Stratified Sample:\n"; bold = true)
printstyled("data: "; bold = true)
print(size(design.data)[1], "x", size(design.data)[2], " DataFrame")
print(size(design.data, 1), "x", size(design.data, 2), " DataFrame")
printstyled("\nweights: "; bold = true)
print_short(design.data.weights)
printstyled("\nprobs: "; bold = true)
print_short(design.data.probs)
printstyled("\nstrata: "; bold = true)
print_short(design.data.strata)
# TODO: change fpc
printstyled("\nfpc: "; bold = true)
print("\n popsize: ")
print_short(design.data.popsize)
print("\n sampsize: ")
print_short(design.data.sampsize)
print_short(design.fpc)
printstyled("\n popsize: "; bold = true)
print(design.popsize)
printstyled("\n sampsize: "; bold = true)
print(design.sampsize)
end

"""
A `ClusterSample` object holds information necessary for surveys sampled by
clustering.
ClusterSample <: AbstractSurveyDesign

Survey design sampled by clustering.
"""
struct ClusterSample <: AbstractSurveyDesign
data::DataFrame
end

# `show` method for printing information about a `ClusterSample` after construction
function Base.show(io::IO, design::ClusterSample)
printstyled("Simple Random Sample:\n")
printstyled("Cluster Sample:\n"; bold = true)
printstyled("data: "; bold = true)
print(size(design.data)[1], "x", size(design.data)[2], " DataFrame")
print(size(design.data, 1), "x", size(design.data, 2), " DataFrame")
printstyled("\nweights: "; bold = true)
print_short(design.data.weights)
printstyled("\nprobs: "; bold = true)
print_short(design.data.probs)
# TODO: change fpc
printstyled("\nfpc: "; bold = true)
print("\n popsize: ")
print_short(design.data.popsize)
print("\n sampsize: ")
print_short(design.data.sampsize)
print_short(design.fpc)
printstyled("\n popsize: "; bold = true)
print(design.popsize)
printstyled("\n sampsize: "; bold = true)
print(design.sampsize)
end
22 changes: 5 additions & 17 deletions src/dimnames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
Get the dimensions of a `SurveyDesign`.

```jldoctest
julia> using Survey

julia> apisrs = load_data("apisrs");

julia> srs = SimpleRandomSample(apisrs);

julia> dim(srs)
(200, 44)
(200, 42)
```
"""
dim(design::AbstractSurveyDesign) = size(design.data)
Expand All @@ -19,8 +17,6 @@ dim(design::AbstractSurveyDesign) = size(design.data)
Method for `svydesign` object.

```jldoctest
julia> using Survey

julia> apistrat = load_data("apistrat");

julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc);
Expand All @@ -36,14 +32,12 @@ dim(design::svydesign) = size(design.variables)
Get the column names of a `SurveyDesign`.

```jldoctest
julia> using Survey

julia> apisrs = load_data("apisrs");

julia> srs = SimpleRandomSample(apisrs);

julia> colnames(srs)
44-element Vector{String}:
42-element Vector{String}:
"Column1"
"cds"
"stype"
Expand All @@ -55,15 +49,15 @@ julia> colnames(srs)
"cname"
"cnum"
"avg.ed"
"full"
"emer"
"enroll"
"api.stu"
"pw"
"fpc"
"weights"
"probs"
"popsize"
"sampsize"
```
"""
colnames(design::AbstractSurveyDesign) = names(design.data)
Expand All @@ -72,8 +66,6 @@ colnames(design::AbstractSurveyDesign) = names(design.data)
Method for `svydesign` objects.

```jldoctest
julia> using Survey

julia> apistrat = load_data("apistrat");

julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc);
Expand Down Expand Up @@ -109,16 +101,14 @@ colnames(design::svydesign) = names(design.variables)
Get the names of the rows and columns of a `SurveyDesign`.

```jldoctest
julia> using Survey

julia> apisrs = load_data("apisrs");

julia> srs = SimpleRandomSample(apisrs);

julia> dimnames(srs)
2-element Vector{Vector{String}}:
["1", "2", "3", "4", "5", "6", "7", "8", "9", "10" … "191", "192", "193", "194", "195", "196", "197", "198", "199", "200"]
["Column1", "cds", "stype", "name", "sname", "snum", "dname", "dnum", "cname", "cnum" … "full", "emer", "enroll", "api.stu", "pw", "fpc", "weights", "probs", "popsize", "sampsize"]
["Column1", "cds", "stype", "name", "sname", "snum", "dname", "dnum", "cname", "cnum" … "grad.sch", "avg.ed", "full", "emer", "enroll", "api.stu", "pw", "fpc", "weights", "probs"]
```
"""
dimnames(design::AbstractSurveyDesign) = [string.(1:size(design.data, 1)), names(design.data)]
Expand All @@ -127,8 +117,6 @@ dimnames(design::AbstractSurveyDesign) = [string.(1:size(design.data, 1)), names
Method for `svydesign` objects.

```jldoctest
julia> using Survey

julia> apistrat = load_data("apistrat");

julia> dstrat = svydesign(data = apistrat, id = :1, strata = :stype, weights = :pw, fpc = :fpc);
Expand Down
2 changes: 0 additions & 2 deletions src/svyboxplot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ The keyword arguments are all the arguments that can be passed to `mapping` in
[AlgebraOfGraphics](https://docs.juliahub.com/AlgebraOfGraphics/CHIaw/0.4.7/).

```@example svyboxplot
julia> using survey

julia> apisrs = load_data("apisrs");

julia> srs = SimpleRandomSample(apisrs);
Expand Down
10 changes: 4 additions & 6 deletions src/svydesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,17 @@
The `svydesign` object combines a data frame and all the survey design information needed to analyse it.

```jldoctest
julia> using Survey;

julia> apiclus1 = load_data("apiclus1");

julia> dclus1 = svydesign(id= :dnum, weights= :pw, data = apiclus1, fpc= :fpc) |> print
Survey Design:
variables: 183x45 DataFrame
id: dnum
strata: 1, 1, 1 ... (length = 183)
probs: 0.029544719150814778, 0.029544719150814778, 0.029544719150814778 ... (length = 183)
strata: 1, 1, 1 ... 1
probs: 0.029544719150814778, 0.029544719150814778, 0.029544719150814778 ... 0.029544719150814778
fpc:
popsize: 757, 757, 757 ... (length = 183)
sampsize: 183, 183, 183 ... (length = 183)
popsize: 757, 757, 757 ... 757
sampsize: 183, 183, 183 ... 183
nest: false
check_strat: true
```
Expand Down
2 changes: 0 additions & 2 deletions src/svyglm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ svyglm(formula, design, dist, link)
The `svyglm` function can be used to fit glms on svydesign.

```jldoctest
julia> using Survey

julia> apiclus1 = load_data("apiclus1");

julia> dclus1 = svydesign(id=:dnum, weights=:pw, data = apiclus1);
Expand Down
2 changes: 0 additions & 2 deletions src/svyhist.jl
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ The `weights` argument should be a `Symbol` specifying a design variable.
For the complete argument list see [Makie.hist](https://makie.juliaplots.org/stable/examples/plotting_functions/hist/).

```julia
julia> using Survey

julia> apisrs = load_data("apisrs");

julia> srs = SimpleRandomSample(apisrs);
Expand Down
Loading