From 140eb889ea3023bdb8f78d2eadc1b97c0516ab51 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Fri, 30 Dec 2022 22:34:46 +0530
Subject: [PATCH 01/80] Add general survey design

---
 docs/src/api.md      |  2 +-
 src/Survey.jl        |  4 +-
 src/SurveyDesign.jl  | 91 +++++++++++++++++++++-----------------------
 src/bootstrap.jl     | 40 ++++++++++++-------
 src/jackknife.jl     |  2 +-
 src/mean.jl          | 16 ++++++--
 src/ratio.jl         |  4 +-
 src/show.jl          |  8 ++--
 src/total.jl         | 10 ++---
 test/SurveyDesign.jl |  6 +--
 test/bootstrap.jl    |  2 +-
 test/jackknife.jl    |  2 +-
 test/mean.jl         |  2 +-
 test/ratio.jl        |  2 +-
 test/total.jl        |  2 +-
 15 files changed, 105 insertions(+), 88 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 5554b427..062d379d 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -27,7 +27,7 @@ mean(x::Symbol, by::Symbol, design::SimpleRandomSample)
 total(x::Symbol, by::Symbol, design::SimpleRandomSample) 
 ```
 ```@docs
-ratio(variable_num:: Symbol, variable_den:: Symbol, design::OneStageClusterSample)
+ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesign)
 plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 hist(design::AbstractSurveyDesign, var::Symbol,
diff --git a/src/Survey.jl b/src/Survey.jl
index 086e11ef..8854e1d3 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -28,11 +28,11 @@ include("ratio.jl")
 
 export load_data
 export AbstractSurveyDesign, SimpleRandomSample, StratifiedSample
-export OneStageClusterSample
+export SurveyDesign
 export dim, colnames, dimnames
 export mean, total, quantile
 export plot
-export hist, sturges, freedman_diaconis
+export hist
 export boxplot
 export Bootstrap
 export jkknife
diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 30770b77..a17655c0 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -323,17 +323,17 @@ struct StratifiedSample <: AbstractSurveyDesign
 end
 
 """
-    OneStageClusterSample <: AbstractSurveyDesign
+    SurveyDesign <: AbstractSurveyDesign
 
-Survey design sampled by one stage cluster sampling.
+Survey design sampled by one stage clusters sampling.
 Clusters chosen by SRS followed by complete sampling of selected clusters.
-Assumes each individual in one and only one cluster; disjoint and nested clusters.
+Assumes each individual in one and only one clusters; disjoint and nested clusters.
 
-`cluster` must be specified as a Symbol name of a column in `data`.
+`clusters` must be specified as a Symbol name of a column in `data`.
 
 # Arguments:
 `data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
-`cluster::Symbol`: the stratification variable - must be given as a column in `data`.
+`clusters::Symbol`: the stratification variable - must be given as a column in `data`.
 `popsize::Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}}=nothing`: the (expected) survey population size. For 
 
 `weights::Union{Nothing,Symbol,Vector{<:Real}}=nothing`: the sampling weights.
@@ -343,11 +343,11 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
 
-julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc)
-OneStageClusterSample:
+julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
+SurveyDesign:
 data: 183x45 DataFrame
-cluster: dnum
-design.data[!,design.cluster]: 637, 637, 637, ..., 448
+clusters: dnum
+design.data[!,design.clusters]: 637, 637, 637, ..., 448
 popsize: fpc
 design.data[!,design.popsize]: 757, 757, 757, ..., 757
 sampsize: sampsize
@@ -362,11 +362,11 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
 
-julia> dclus1 = OneStageClusterSample(apiclus1, :dnum; weights=:pw)
-OneStageClusterSample:
+julia> dclus1 = SurveyDesign(apiclus1, :dnum; weights=:pw)
+SurveyDesign:
 data: 183x46 DataFrame
-cluster: dnum
-design.data[!,design.cluster]: 637, 637, 637, ..., 448
+clusters: dnum
+design.data[!,design.clusters]: 637, 637, 637, ..., 448
 popsize: popsize
 design.data[!,design.popsize]: 757.0, 757.0, 757.0, ..., 757.0
 sampsize: sampsize
@@ -378,53 +378,48 @@ design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
 design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
 ```
 """
-struct OneStageClusterSample <: AbstractSurveyDesign
+struct SurveyDesign <: AbstractSurveyDesign
     data::AbstractDataFrame
     cluster::Symbol
     popsize::Symbol
     sampsize::Symbol
-    weights::Symbol
+    strata::Symbol
     pps::Bool
-    has_strata::Bool
-    # Single stage cluster sample, like apiclus1
-    function OneStageClusterSample(data::AbstractDataFrame, cluster::Symbol, popsize::Symbol; kwargs...) # Right now kwargs does nothing, for expansion
+    # Single stage clusters sample, like apiclus1
+    function SurveyDesign(data::AbstractDataFrame; strata::Union{Nothing,Symbol} = nothing, weights::Union{Nothing,Symbol}= nothing, clusters::Union{Nothing, Symbol, Vector{Symbol}} = nothing, popsize::Union{Nothing, Int,Symbol}=nothing) 
         # sampsize here is number of clusters completely sampled, popsize is total clusters in population
-        if !(typeof(data[!, popsize]) <: Vector{<:Real})
-            error(string("given popsize column ", popsize , " is not of numeric type"))
+        if typeof(strata) <:Nothing
+            data.false_strata = repeat(["FALSE_STRATA"], nrow(data))
+            strata = :false_strata
+        end
+        if typeof(clusters) <: Nothing
+            data.false_cluster = 1:nrow(data)
+            cluster = :false_cluster
+        end
+        ## Single stage approximation
+        if typeof(clusters) <: Vector{Symbol}
+            cluster = first(clusters)
         end
-        if !all(w -> w == first(data[!, popsize]), data[!, popsize])
-            error("popsize must be same for all observations within the cluster in ClusterSample")
+        if typeof(clusters) <: Symbol
+            cluster = clusters
         end
         # For one-stage sample only one sampsize vector
         sampsize_labels = :sampsize
-        data_groupedby_cluster = groupby(data, cluster)
-        data[!, sampsize_labels] = fill(size(data_groupedby_cluster, 1),(nrow(data),))
-        weights = :weights
-        data[!, :weights] = data[!, popsize] ./ data[!, sampsize_labels]
-        data[!, :probs] = 1 ./ data[!, weights] # Many formulae are easily defined in terms of sampling probabilties
-        data[!, :allprobs] = data[!, :probs] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
-        data[!, :strata] = ones(nrow(data))
-        pps = false
-        has_strata = false
-        new(data, cluster, popsize, sampsize_labels, weights ,pps, has_strata)
-    end
-    # Single stage cluster sample, like apiclus1
-    function OneStageClusterSample(data::AbstractDataFrame, cluster::Symbol; weights::Symbol=nothing, kwargs...) # Right now kwargs does nothing, for expansion
-        # sampsize here is number of clusters completely sampled, popsize is total clusters in population
-        if !(typeof(data[!, weights]) <: Vector{<:Real})
-            error(string("given weights column ", weights , " is not of numeric type"))
+        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])),(nrow(data),))
+        if !(typeof(popsize) <: Nothing)
+            data[!, :weights] = data[!, popsize] ./ data[!, sampsize_labels]
+        elseif !(typeof(weights) <: Nothing)
+            data.weights = data[!, weights]
+        else
+            data.weights = repeat([1], nrow(data))
         end
-        sampsize_labels = :sampsize
-        data_groupedby_cluster = groupby(data, cluster)
-        data[!, sampsize_labels] = fill(size(data_groupedby_cluster, 1),(nrow(data),))
-        popsize = :popsize
-        data[!, popsize] = data[!, weights] .* data[!, sampsize_labels]
-        data[!, :probs] = 1 ./ data[!, weights] # Many formulae are easily defined in terms of sampling probabilties
-        data[!, :weights] = data[!, weights]
+        data[!, :probs] = 1 ./ data[!, :weights] # Many formulae are easily defined in terms of sampling probabilties
         data[!, :allprobs] = data[!, :probs] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
-        data[!, :strata] = ones(nrow(data))
         pps = false
-        has_strata = false
-        new(data, cluster, popsize, sampsize_labels, weights, pps, has_strata)
+        if !(typeof(popsize) <: Symbol)
+            data.popsize = repeat([sum(data.weights)], nrow(data))
+            popsize = :popsize
+        end
+        new(data, cluster, popsize, sampsize_labels, strata, pps)
     end
 end
\ No newline at end of file
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 27c84a73..1320cd51 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -1,7 +1,7 @@
 struct Bootstrap 
     replicates
     rng
-    function Bootstrap(; replicates = 100, rng = MersenneTwister(111))
+    function Bootstrap(; replicates = 1000, rng = MersenneTwister(111))
         new(replicates, rng)
     end
 end
@@ -12,7 +12,7 @@ julia> using Survey, Random, StatsBase;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
 
 julia> rng = MersenneTwister(111); 
 
@@ -27,18 +27,32 @@ julia> Survey.bootstrap(:api00, dclus1, func; replicates=1000, rng)
 
 ```
 """
-function bootstrap(x::Symbol, design::OneStageClusterSample, func = wsum; replicates = 100, rng = MersenneTwister(1234))
-    gdf = groupby(design.data, design.cluster)
-    psus = unique(design.data[!, design.cluster])
-    nh = length(psus)
+function bootstrap(x::Symbol, design::SurveyDesign, func = wsum; replicates = 100, rng = MersenneTwister(1234))
     X = func(design.data[:, x], design.data.weights)
+    H = length(unique(design.data[!, design.strata]))
+    stratified = groupby(design.data, design.strata)
     Xt = Array{Float64, 1}(undef, replicates)
     for i in 1:replicates
-        selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
-        xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
-        whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
-        Xt[i] = func(xhij, whij)
-    end 
-    variance = sum((Xt .- X).^2) / replicates
+        Xh = []
+        Wh = []
+        for j in 1:H
+            substrata = DataFrame(stratified[j])
+            psus = unique(substrata[!, design.cluster])
+            if length(psus) == 1
+                return DataFrame(statistic = X, SE = 0)
+            end
+            nh = length(psus)
+            gdf = groupby(substrata, design.cluster)
+            selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
+            xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
+            whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
+            append!(Xh, xhij)
+            append!(Wh, whij)
+        end
+        Xh = Float64.(Xh)
+        Wh = Float64.(Wh)
+        Xt[i] = func(Xh, Wh)
+        end 
+        variance = sum((Xt .- X).^2) / replicates
     return DataFrame(statistic = X, SE = sqrt(variance))
-end
+end
\ No newline at end of file
diff --git a/src/jackknife.jl b/src/jackknife.jl
index 2d0a656d..794ef10b 100644
--- a/src/jackknife.jl
+++ b/src/jackknife.jl
@@ -1,4 +1,4 @@
-function jkknife(variable:: Symbol, design::OneStageClusterSample ,func:: Function;  params =[])
+function jkknife(variable:: Symbol, design::SurveyDesign ,func:: Function;  params =[])
     statistic = func(design.data[!,variable],params...)
     nh = length(unique(design.data[!,design.cluster]))
     newv = []
diff --git a/src/mean.jl b/src/mean.jl
index c54cc337..d56c815d 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -6,7 +6,7 @@ Estimate the population mean of a variable of a simple random sample, and the co
 The calculations were done according to the book [Sampling Techniques](https://www.academia.edu/29684662/Cochran_1977_Sampling_Techniques_Third_Edition)
 by William Cochran.
 
-For OneStageClusterSample, formula adapted from Sarndal pg129, section 4.2.2 Simple Random Cluster Sampling
+For SurveyDesign, formula adapted from Sarndal pg129, section 4.2.2 Simple Random Cluster Sampling
 
 ```jldoctest
 julia> apisrs = load_data("apisrs");
@@ -92,7 +92,7 @@ function mean(x::Symbol, design::StratifiedSample)
     return DataFrame(mean=Ȳ̂, SE=SE)
 end
 
-function mean(x::Symbol, design::OneStageClusterSample)
+function mean(x::Symbol, design::SurveyDesign)
     ## Based on logical translation of corresponding in total.jl
     ## Not quite same from R as it rounds of `total`, so division results in difference
     # > svymean(~api00,dclus1)
@@ -190,7 +190,7 @@ julia> using Survey, Random, StatsBase;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
 
 julia> mean(:api00, dclus1, Bootstrap(replicates = 1000, rng = MersenneTwister(111)))
 1×2 DataFrame
@@ -200,8 +200,16 @@ julia> mean(:api00, dclus1, Bootstrap(replicates = 1000, rng = MersenneTwister(1
    1 │ 644.169  23.0897
 ```
 """
-function mean(x::Symbol, design::OneStageClusterSample, method::Bootstrap)
+function mean(x::Symbol, design::SurveyDesign, method::Bootstrap)
     weighted_mean(x, w) = mean(x, weights(w))
     df = bootstrap(x, design, weighted_mean; method.replicates, method.rng)
     df = rename(df, :statistic => :mean)
+end
+
+function mean(x::Symbol, by::Symbol, design::SurveyDesign, method::Bootstrap)
+    gdf = groupby(design.data, by)
+    subdesigns = [SurveyDesign(gdf[i]; strata = design.strata, weights = :weights, clusters = design.cluster) for i in 1:length(gdf)]    
+    df = vcat([mean(x, subdesign, method) for subdesign in subdesigns]...)
+    df[!, by] = [first(gdf[i][!, by]) for i in 1:length(gdf)]
+    return df
 end
\ No newline at end of file
diff --git a/src/ratio.jl b/src/ratio.jl
index c6050635..4a75e9aa 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -9,7 +9,7 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
 
-julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc);
+julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc);
 
 julia> ratio(:api00, :enroll, dclus1)
 1×2 DataFrame
@@ -19,7 +19,7 @@ julia> ratio(:api00, :enroll, dclus1)
    1 │   1.17182  0.151242
 ```
 """
-function ratio(variable_num:: Symbol, variable_den:: Symbol, design::OneStageClusterSample)
+function ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesign)
     statistic = wsum(design.data[!,variable_num],design.data.weights)/wsum(design.data[!,variable_den],design.data.weights)
     nh = length(unique(design.data[!,design.cluster]))
     newv = []
diff --git a/src/show.jl b/src/show.jl
index 1bc9989a..a912e723 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -49,7 +49,7 @@ function Base.show(io::IO, ::MIME"text/plain", design::StratifiedSample)
 end
 
 "Print information about a survey design."
-function Base.show(io::IO, ::MIME"text/plain", design::OneStageClusterSample)
+function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)
     printstyled(io, "data: "; bold=true)
@@ -60,9 +60,9 @@ function Base.show(io::IO, ::MIME"text/plain", design::OneStageClusterSample)
     printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
     printinfo(io, "sampsize", string(design.sampsize); newline=true)
     printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    printinfo(io, "weights", string(design.weights); newline=true)
-    printinfo(io, "design.data[!,design.weights]", makeshort(design.data[!,design.weights]))
-    printinfo(io, "design.data[!,:strata]", makeshort(design.data[!,:strata]))
+    # printinfo(io, "weights", string(design.weights); newline=true)
+    # printinfo(io, "design.data[!,design.weights]", makeshort(design.data[!,design.weights]))
+    # printinfo(io, "design.data[!,:strata]", makeshort(design.data[!,:strata]))
     printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
     printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
 end
\ No newline at end of file
diff --git a/src/total.jl b/src/total.jl
index 087c6123..f9824255 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -3,7 +3,7 @@
 
 Estimate the population total for the variable specified by `x`.
 
-For OneStageClusterSample, formula adapted from Sarndal pg129, section 4.2.2 Simple Random Cluster Sampling
+For SurveyDesign, formula adapted from Sarndal pg129, section 4.2.2 Simple Random Cluster Sampling
 
 ```jldoctest
 julia> using Survey;
@@ -94,7 +94,7 @@ julia> using Survey
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
 
 julia> total(:api00, dclus1)
 1×2 DataFrame
@@ -104,7 +104,7 @@ julia> total(:api00, dclus1)
    1 │ 5.94916e6  1.33948e6
 ```
 """
-function total(x::Symbol, design::OneStageClusterSample)
+function total(x::Symbol, design::SurveyDesign)
     gdf = groupby(design.data, design.cluster)
     ŷₜ = combine(gdf, x => sum => :sum).sum
     Nₜ = first(design.data[!,design.popsize])
@@ -157,7 +157,7 @@ julia> using Survey, Random, StatsBase;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
 
 julia> total(:api00, dclus1, Bootstrap(replicates = 1000, rng = MersenneTwister(111)))
 1×2 DataFrame
@@ -167,7 +167,7 @@ julia> total(:api00, dclus1, Bootstrap(replicates = 1000, rng = MersenneTwister(
    1 │ 5.94916e6  1.36593e6
 ```
 """
-function total(x::Symbol, design::OneStageClusterSample, method::Bootstrap)
+function total(x::Symbol, design::SurveyDesign, method::Bootstrap)
     df = bootstrap(x, design, wsum; method.replicates, method.rng)
     df = rename(df, :statistic => :total)
 end
\ No newline at end of file
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index 0ec15b5d..0f3bc796 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -165,14 +165,14 @@ end
     @test_throws ErrorException StratifiedSample(apistrat, :stype; popsize= :pw, sampsize=:fpc) 
 end
 
-@testset "OneStageClusterSample" begin
+@testset "SurveyDesign" begin
     # Load API datasets
     apiclus1_original = load_data("apiclus1")
     apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
     ##############################
     # one-stage cluster sample with popsize
     apiclus1 = copy(apiclus1_original)
-    dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc)
+    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
     @test dclus1.data[!,dclus1.weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
     @test dclus1.data[!,dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
     @test dclus1.data[!,:allprobs] ≈ dclus1.data[!,:probs] atol = 1e-4
@@ -180,7 +180,7 @@ end
     ##############################
     # one-stage cluster sample with weights
     apiclus1 = copy(apiclus1_original)
-    dclus1 = OneStageClusterSample(apiclus1, :dnum; weights=:pw)
+    dclus1 = SurveyDesign(apiclus1, :dnum; weights=:pw)
     @test dclus1.data[!,dclus1.weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
     @test dclus1.data[!,dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
     @test dclus1.data[!,:allprobs] ≈ dclus1.data[!,:probs] atol = 1e-4
diff --git a/test/bootstrap.jl b/test/bootstrap.jl
index 7c9778eb..e49c1b3f 100644
--- a/test/bootstrap.jl
+++ b/test/bootstrap.jl
@@ -1,6 +1,6 @@
 using Random, StatsBase
 apiclus1 = load_data("apiclus1")
-dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc); 
+dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
 rng = MersenneTwister(111); 
 func = wsum; 
 est = Survey.bootstrap(:api00, dclus1, func; replicates=1000, rng)
diff --git a/test/jackknife.jl b/test/jackknife.jl
index 85ca3261..73e90f78 100644
--- a/test/jackknife.jl
+++ b/test/jackknife.jl
@@ -4,7 +4,7 @@
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc)
+    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
     @test jkknife(:api00,dclus1, mean).SE[1] ≈ 26.5997 atol = 1e-4
     @test jkknife(:api00, dclus1, mean).Statistic[1] ≈ 644.1693 atol = 1e-4
 end
diff --git a/test/mean.jl b/test/mean.jl
index bbc1d952..d2fbd8a2 100644
--- a/test/mean.jl
+++ b/test/mean.jl
@@ -80,7 +80,7 @@ end
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc)
+    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
 
     @test mean(:api00,dclus1, Bootstrap()).mean[1] ≈ 644.17 atol = 1
     @test mean(:api00,dclus1, Bootstrap(replicates = 10000)).SE[1] ≈ 23.779 atol = 0.5 # without fpc as it hasn't been figured out for bootstrap. 
diff --git a/test/ratio.jl b/test/ratio.jl
index 9ac25a1a..d198ce1b 100644
--- a/test/ratio.jl
+++ b/test/ratio.jl
@@ -4,7 +4,7 @@
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc)
+    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
     @test ratio(:api00, :enroll, dclus1).SE[1] ≈ 0.151242 atol = 1e-4
     @test ratio(:api00, :enroll, dclus1).Statistic[1] ≈ 1.17182 atol = 1e-4
 end
\ No newline at end of file
diff --git a/test/total.jl b/test/total.jl
index c57114d1..de19c41c 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -100,7 +100,7 @@ end
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = OneStageClusterSample(apiclus1, :dnum, :fpc)
+    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
     @test total(:api00,dclus1).total[1] ≈ 5949162 atol = 1
     @test total(:api00,dclus1).SE[1] ≈ 1339481 atol = 1
 

From 5c7fe92f35ace80361c91db130a8141e5ce7c42c Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Sat, 31 Dec 2022 19:18:49 +0530
Subject: [PATCH 02/80] Add bootweights and domain estimation

---
 src/SurveyDesign.jl | 12 +++++++++++-
 src/bootstrap.jl    | 28 ++++++++++++++++++++++++++++
 src/mean.jl         | 37 +++++++++++++++++++++++++++++++------
 src/ratio.jl        |  4 ++++
 src/show.jl         | 21 +++++++++++++++++++++
 5 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index a17655c0..acfafa50 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -422,4 +422,14 @@ struct SurveyDesign <: AbstractSurveyDesign
         end
         new(data, cluster, popsize, sampsize_labels, strata, pps)
     end
-end
\ No newline at end of file
+end
+
+struct ReplicateDesign <: AbstractSurveyDesign
+    data::AbstractDataFrame
+    cluster::Symbol
+    popsize::Symbol
+    sampsize::Symbol
+    strata::Symbol
+    pps::Bool
+    replicates::UInt
+end
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 1320cd51..e454a82c 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -55,4 +55,32 @@ function bootstrap(x::Symbol, design::SurveyDesign, func = wsum; replicates = 10
         end 
         variance = sum((Xt .- X).^2) / replicates
     return DataFrame(statistic = X, SE = sqrt(variance))
+end
+
+function bootweights(design::SurveyDesign; replicates = 100, rng = MersenneTwister(1234))
+    H = length(unique(design.data[!, design.strata]))
+    stratified = groupby(design.data, design.strata)
+    function replicate(stratified, H)
+        for j in 1:H
+            substrata = DataFrame(stratified[j])
+            psus = unique(substrata[!, design.cluster])
+            if length(psus) == 1
+                return DataFrame(statistic = X, SE = 0)
+            end
+            nh = length(psus)
+            rh = [(count(==(i), rand(1:(nh-1), nh))) for i in 1:nh] # main bootstrap algo. 
+            gdf = groupby(substrata, design.cluster)
+            for i in 1:nh
+                gdf[i].rh = repeat([rh[i]], nrow(gdf[i]))
+            end
+            stratified[j].rh = DataFrame(gdf).rh
+        end
+        return DataFrame(stratified)
+    end
+    df = replicate(stratified, H)
+    rename!(df,:rh => :replicate_1)
+    for i in 2:(replicates)
+        df[!, "replicate_"*string(i)] = Float64.(replicate(stratified, H).rh)
+    end 
+    return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
 end
\ No newline at end of file
diff --git a/src/mean.jl b/src/mean.jl
index d56c815d..ff6e78dd 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -206,10 +206,35 @@ function mean(x::Symbol, design::SurveyDesign, method::Bootstrap)
     df = rename(df, :statistic => :mean)
 end
 
-function mean(x::Symbol, by::Symbol, design::SurveyDesign, method::Bootstrap)
-    gdf = groupby(design.data, by)
-    subdesigns = [SurveyDesign(gdf[i]; strata = design.strata, weights = :weights, clusters = design.cluster) for i in 1:length(gdf)]    
-    df = vcat([mean(x, subdesign, method) for subdesign in subdesigns]...)
-    df[!, by] = [first(gdf[i][!, by]) for i in 1:length(gdf)]
-    return df
+# function mean(x::Symbol, by::Symbol, design::SurveyDesign, method::Bootstrap)
+#     gdf = groupby(design.data, by)
+#     subdesigns = [SurveyDesign(gdf[i]; strata = design.strata, weights = :weights, clusters = design.cluster) for i in 1:length(gdf)]    
+#     df = vcat([mean(x, subdesign, method) for subdesign in subdesigns]...)
+#     df[!, by] = [first(gdf[i][!, by]) for i in 1:length(gdf)]
+#     return df
+# end
+
+function mean(x::Symbol, design::ReplicateDesign)
+    X = mean(design.data[!, x], weights(design.data.weights))
+    Xt = [mean(design.data[!, x], weights(design.data.weights .* design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
+    variance = sum((Xt .- X).^2) / design.replicates
+    DataFrame(mean = X, SE = sqrt(variance))
+end
+
+
+function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
+    gdf = groupby(design.data, domain)
+    X = combine(gdf, [x, :weights] => ((a, b) -> mean(a, weights(b))) => :mean)
+    Xt_mat = Array{Float64, 2}(undef, (length(unique(design.data[!, domain])), design.replicates))
+    for i in 1:design.replicates
+        Xt = combine(gdf, [x, :weights, Symbol("replicate_"*string(i))] => ((a, b, c) -> mean(a, weights(b .* c))) => :mean).mean
+        for i in 1:length(Xt)
+            if isnan(Xt[i]) 
+                Xt[i] = X.mean[i] # replace lonely psu with point estimate. This needs to be corrected. 
+            end
+        end
+        Xt_mat[:, i] = Xt
+    end
+    X.SE = sqrt.(sum((Xt_mat .- X.mean).^2 / design.replicates, dims = 2))[:,1]
+    return X
 end
\ No newline at end of file
diff --git a/src/ratio.jl b/src/ratio.jl
index 4a75e9aa..8e923d42 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -36,3 +36,7 @@ function ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesig
     var = c*(nh-1)/nh
     return DataFrame(Statistic = statistic, SE = sqrt(var))
 end
+
+# function ratio(x::Symbol, design::ReplicateDesign)
+#     design.data[!, "ones"] = ones(nrow(design.data))
+# end
\ No newline at end of file
diff --git a/src/show.jl b/src/show.jl
index a912e723..4adb61d8 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -65,4 +65,25 @@ function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
     # printinfo(io, "design.data[!,:strata]", makeshort(design.data[!,:strata]))
     printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
     printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
+end
+
+"Print information about a repliocate design."
+function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
+    type = typeof(design)
+    printstyled(io, "$type:\n"; bold=true)
+    printstyled(io, "data: "; bold=true)
+    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
+    printinfo(io, "cluster", string(design.cluster); newline=true)
+    printinfo(io, "design.data[!,design.cluster]", makeshort(design.data[!,design.cluster]))
+    printinfo(io, "popsize", string(design.popsize); newline=true)
+    printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
+    printinfo(io, "sampsize", string(design.sampsize); newline=true)
+    printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
+    # printinfo(io, "weights", string(design.weights); newline=true)
+    # printinfo(io, "design.data[!,design.weights]", makeshort(design.data[!,design.weights]))
+    # printinfo(io, "design.data[!,:strata]", makeshort(design.data[!,:strata]))
+    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
+    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
+    printstyled(io, "replicates: "; bold=true)
+    println(io, design.replicates)
 end
\ No newline at end of file

From 5c9ec3106743f13e28c654d30d2ae5b84d569f07 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Sun, 1 Jan 2023 11:22:30 +0530
Subject: [PATCH 03/80] Remove other mean functions.

---
 src/bootstrap.jl |   2 +-
 src/mean.jl      | 223 +++--------------------------------------------
 2 files changed, 14 insertions(+), 211 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index e454a82c..2662a7a6 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -68,7 +68,7 @@ function bootweights(design::SurveyDesign; replicates = 100, rng = MersenneTwist
                 return DataFrame(statistic = X, SE = 0)
             end
             nh = length(psus)
-            rh = [(count(==(i), rand(1:(nh-1), nh))) for i in 1:nh] # main bootstrap algo. 
+            rh = [(count(==(i), rand(rng, 1:(nh-1), nh))) for i in 1:nh] # main bootstrap algo. 
             gdf = groupby(substrata, design.cluster)
             for i in 1:nh
                 gdf[i].rh = repeat([rh[i]], nrow(gdf[i]))
diff --git a/src/mean.jl b/src/mean.jl
index ff6e78dd..f19ee1f5 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -1,189 +1,3 @@
-"""
-    mean(x, design)
-
-Estimate the population mean of a variable of a simple random sample, and the corresponding standard error.
-
-The calculations were done according to the book [Sampling Techniques](https://www.academia.edu/29684662/Cochran_1977_Sampling_Techniques_Third_Edition)
-by William Cochran.
-
-For SurveyDesign, formula adapted from Sarndal pg129, section 4.2.2 Simple Random Cluster Sampling
-
-```jldoctest
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
-
-julia> mean(:enroll, srs)
-1×2 DataFrame
- Row │ mean     SE     
-     │ Float64  Float64 
-─────┼──────────────────
-   1 │  584.61  27.3684
-
-julia> mean([:api00, :api99], srs)
-2×3 DataFrame
- Row │ names   mean     SE     
-     │ String  Float64  Float64 
-─────┼──────────────────────────
-   1 │ api00   656.585  9.24972
-   2 │ api99   624.685  9.5003
-
-julia> strat = load_data("apistrat"); 
-
-julia> dstrat = StratifiedSample(strat, :stype; popsize  = :fpc); 
-
-julia> mean(:api00, dstrat)
-1×2 DataFrame
- Row │ mean     SE     
-     │ Float64  Float64 
-─────┼──────────────────
-   1 │ 662.287  9.40894
-```
-"""
-function mean(x::Symbol, design::SimpleRandomSample)
-    function se(x::Symbol, design::SimpleRandomSample)
-        variance = design.fpc * Statistics.var(design.data[!, x]) / design.sampsize 
-        return sqrt(variance)
-    end
-    if isa(design.data[!, x], CategoricalArray)
-        gdf = groupby(design.data, x)
-        p = combine(gdf, nrow => :counts)
-        p.mean = p.counts ./ sum(p.counts)
-        # variance of proportion
-        p.var = design.fpc .* p.mean .* (1 .- p.mean) ./ (design.sampsize - 1)
-        p.se = sqrt.(p.var)
-        return select(p, Not([:counts, :var]))
-    end
-    return DataFrame(mean=mean(design.data[!, x]), SE=se(x, design))
-end
-
-function mean(x::Vector{Symbol}, design::SimpleRandomSample)
-    df = reduce(vcat, [mean(i, design) for i in x])
-    insertcols!(df, 1, :names => String.(x))
-    return df
-end
-
-function mean(x::Symbol, design::StratifiedSample)
-    if x == design.strata
-        gdf = groupby(design.data, x)
-        p = combine(gdf, :weights => sum => :Nₕ)
-        p.Wₕ = p.Nₕ ./ sum(p.Nₕ)
-        p = select!(p, Not(:Nₕ))
-        return p
-    elseif isa(design.data[!, x], CategoricalArray)
-        gdf = groupby(design.data, x)
-        p = combine(gdf, nrow => :counts)
-        p.proportion = p.counts ./ sum(p.counts)
-        # variance of proportion
-        p.var = design.fpc .* p.proportion .* (1 .- p.proportion) ./ (design.sampsize - 1)
-        p.SE = sqrt.(p.var)
-        return p
-    end
-    gdf = groupby(design.data, design.strata)
-    ȳₕ = combine(gdf, x => mean => :mean).mean
-    Nₕ = combine(gdf, :weights => sum => :Nₕ).Nₕ
-    nₕ = combine(gdf, nrow => :nₕ).nₕ
-    fₕ = nₕ ./ Nₕ
-    Wₕ = Nₕ ./ sum(Nₕ)
-    Ȳ̂ = sum(Wₕ .* ȳₕ)
-    s²ₕ = combine(gdf, x => var => :s²h).s²h
-    V̂Ȳ̂ = sum((Wₕ .^ 2) .* (1 .- fₕ) .* s²ₕ ./ nₕ)
-    SE = sqrt(V̂Ȳ̂)
-    return DataFrame(mean=Ȳ̂, SE=SE)
-end
-
-function mean(x::Symbol, design::SurveyDesign)
-    ## Based on logical translation of corresponding in total.jl
-    ## Not quite same from R as it rounds of `total`, so division results in difference
-    # > svymean(~api00,dclus1)
-    #     mean     SE
-    # api00 644.17 23.542
-    gdf = groupby(design.data, design.cluster)
-    ȳₜ = combine(gdf, x => mean => :mean).mean
-    Nₜ = first(design.data[!,design.popsize])
-    nₜ = first(design.data[!,design.sampsize])
-    Ȳ = mean(ȳₜ)
-    s²ₜ = var(ȳₜ)
-    VȲ = (1 - nₜ/Nₜ) * s²ₜ / nₜ
-    return DataFrame(mean = Ȳ, SE = sqrt(VȲ))
-end
-
-"""
-    mean(x, by, design)
-
-Estimate the subpopulation mean of a variable `x`.
-
-The calculations were done according to the book [Model-Assisted Survey Sampling](https://link.springer.com/book/9780387406206)
-by Carl-Erik Sarndal, Bengt Swensson, Jan Wretman, section 3.3 and Chap 10. Assumes popsize is known and subpopulation size is unknown.
-
-```jldoctest
-julia> using Survey; 
-
-julia> srs = load_data("apisrs"); 
-
-julia> srs = SimpleRandomSample(srs; popsize = :fpc);
-
-julia> mean(:api00, :cname, srs) |> first
-DataFrameRow
- Row │ cname     mean     SE     
-     │ String15  Float64  Float64 
-─────┼────────────────────────────
-   1 │ Kern        573.6  42.8026
-
-julia> strat = load_data("apistrat");
-
-julia> dstrat = StratifiedSample(strat, :stype; popsize  = :fpc);
-
-julia> mean(:api00, :cname, dstrat) |> first 
-DataFrameRow
- Row │ cname        mean     SE      
-     │ String15     Float64  Float64 
-─────┼───────────────────────────────
-   1 │ Los Angeles  633.511  21.3912
-```
-"""
-function mean(x::Symbol, by::Symbol, design::SimpleRandomSample) 
-    function domain_mean(x::AbstractVector, design::SimpleRandomSample, weights)
-        function se(x::AbstractVector, design::SimpleRandomSample)
-            nd = length(x)  # domain size
-            n = design.sampsize
-            fpc = design.fpc
-            variance = (nd / n)^(-2) / n * fpc * ((nd - 1) / (n - 1)) * var(x)
-            return sqrt(variance)
-        end
-        return DataFrame(mean=Statistics.mean(x), SE=se(x, design))
-    end
-    gdf = groupby(design.data, by)
-    combine(gdf, [x, :weights] => ((a, b) -> domain_mean(a, design, b)) => AsTable)
-end
-
-function mean(x::Symbol, by::Symbol, design::StratifiedSample)
-    function domain_mean(x::AbstractVector, popsize::AbstractVector, sampsize::AbstractVector, sampfraction::AbstractVector, strata::AbstractVector)
-        df = DataFrame(x=x, popsize=popsize, sampsize=sampsize, sampfraction=sampfraction, strata=strata)
-        function calculate_components(x, popsize, sampsize, sampfraction)
-            return DataFrame(
-                nsdh = length(x),
-                nsh = length(x),
-                substrata_domain_totals = sum(x),
-                ȳsdh = mean(x),
-                Nh = first(popsize),
-                nh = first(sampsize),
-                fh = first(sampfraction),
-                sigma_ȳsh_squares = sum((x .- mean(x)).^2)
-                )
-        end
-        components = combine(groupby(df, :strata), [:x, :popsize, :sampsize, :sampfraction] => calculate_components => AsTable)
-        domain_mean = sum(components.Nh .* components.substrata_domain_totals ./ components.nh) / sum(components.Nh .* components.nsdh ./ components.nh)
-        pdh = components.nsdh ./ components.nh
-        N̂d = sum(components.Nh .* pdh)
-        domain_var = sum(components.Nh .^ 2 .* (1 .- components.fh) .* (components.sigma_ȳsh_squares .+ (components.nsdh .* (1 .- pdh) .* (components.ȳsdh .- domain_mean) .^ 2)) ./ (components.nh .* (components.nh .- 1))) ./ N̂d .^ 2
-        domain_mean_se = sqrt(domain_var)
-        return DataFrame(mean=domain_mean, SE=domain_mean_se)
-    end
-    gdf_domain = groupby(design.data, by)
-    combine(gdf_domain, [x, :popsize,:sampsize,:sampfraction, design.strata] => domain_mean => AsTable)
-end
-
 """
 ```jldoctest
 julia> using Survey, Random, StatsBase; 
@@ -192,7 +6,9 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
 
-julia> mean(:api00, dclus1, Bootstrap(replicates = 1000, rng = MersenneTwister(111)))
+julia> bclus1 = bootweights(apiclus1; replicates = 1000)
+
+julia> mean(:api00, bclus1)
 1×2 DataFrame
  Row │ mean     SE      
      │ Float64  Float64 
@@ -200,20 +16,6 @@ julia> mean(:api00, dclus1, Bootstrap(replicates = 1000, rng = MersenneTwister(1
    1 │ 644.169  23.0897
 ```
 """
-function mean(x::Symbol, design::SurveyDesign, method::Bootstrap)
-    weighted_mean(x, w) = mean(x, weights(w))
-    df = bootstrap(x, design, weighted_mean; method.replicates, method.rng)
-    df = rename(df, :statistic => :mean)
-end
-
-# function mean(x::Symbol, by::Symbol, design::SurveyDesign, method::Bootstrap)
-#     gdf = groupby(design.data, by)
-#     subdesigns = [SurveyDesign(gdf[i]; strata = design.strata, weights = :weights, clusters = design.cluster) for i in 1:length(gdf)]    
-#     df = vcat([mean(x, subdesign, method) for subdesign in subdesigns]...)
-#     df[!, by] = [first(gdf[i][!, by]) for i in 1:length(gdf)]
-#     return df
-# end
-
 function mean(x::Symbol, design::ReplicateDesign)
     X = mean(design.data[!, x], weights(design.data.weights))
     Xt = [mean(design.data[!, x], weights(design.data.weights .* design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
@@ -224,17 +26,18 @@ end
 
 function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
     gdf = groupby(design.data, domain)
+    nd = length(unique(design.data[!, domain]))
     X = combine(gdf, [x, :weights] => ((a, b) -> mean(a, weights(b))) => :mean)
-    Xt_mat = Array{Float64, 2}(undef, (length(unique(design.data[!, domain])), design.replicates))
+    Xt_mat = Array{Float64, 2}(undef, (nd, design.replicates))
     for i in 1:design.replicates
-        Xt = combine(gdf, [x, :weights, Symbol("replicate_"*string(i))] => ((a, b, c) -> mean(a, weights(b .* c))) => :mean).mean
-        for i in 1:length(Xt)
-            if isnan(Xt[i]) 
-                Xt[i] = X.mean[i] # replace lonely psu with point estimate. This needs to be corrected. 
-            end
-        end
-        Xt_mat[:, i] = Xt
+        Xt_mat[:, i] = combine(gdf, [x, :weights, Symbol("replicate_"*string(i))] => ((a, b, c) -> mean(a, weights(b .* c))) => :mean).mean
+    end
+    ses = []
+    for i in 1:nd
+        filtered_dx = filter(!isnan, Xt_mat[i, :] .- X.mean[i])
+        push!(ses, sqrt(sum(filtered_dx.^2) / length(filtered_dx)))
     end
-    X.SE = sqrt.(sum((Xt_mat .- X.mean).^2 / design.replicates, dims = 2))[:,1]
+    X.SE = ses
+    # X.SE = sqrt.(sum((Xt_mat .- X.mean).^2 / design.replicates, dims = 2))[:,1]
     return X
 end
\ No newline at end of file

From 62dc3fd8023f5eb1e5805ca38814e83fe127d171 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Sun, 1 Jan 2023 17:36:37 +0530
Subject: [PATCH 04/80] Fix bug in bootstrap

---
 src/bootstrap.jl | 68 ++++++++++++------------------------------------
 1 file changed, 17 insertions(+), 51 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 2662a7a6..70df8a81 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -1,63 +1,28 @@
-struct Bootstrap 
-    replicates
-    rng
-    function Bootstrap(; replicates = 1000, rng = MersenneTwister(111))
-        new(replicates, rng)
-    end
-end
-
 """
 ```jldoctest
-julia> using Survey, Random, StatsBase; 
+julia> using Survey, Random;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum); 
 
 julia> rng = MersenneTwister(111); 
 
-julia> func = wsum; 
-
-julia> Survey.bootstrap(:api00, dclus1, func; replicates=1000, rng) 
-1×2 DataFrame
- Row │ statistic  SE        
-     │ Float64    Float64   
-─────┼──────────────────────
-   1 │ 5.94916e6  1.36593e6
-
+julia> Survey.bootweights(dclus1; replicates=1000, rng) 
+Survey.ReplicateDesign:
+data: 183x1046 DataFrame
+cluster: dnum
+design.data[!,design.cluster]: 637, 637, 637, ..., 448
+popsize: popsize
+design.data[!,design.popsize]: 183, 183, 183, ..., 183
+sampsize: sampsize
+design.data[!,design.sampsize]: 15, 15, 15, ..., 15
+design.data[!,:probs]: 1.0, 1.0, 1.0, ..., 1.0
+design.data[!,:allprobs]: 1.0, 1.0, 1.0, ..., 1.0
+replicates: 1000
 ```
 """
-function bootstrap(x::Symbol, design::SurveyDesign, func = wsum; replicates = 100, rng = MersenneTwister(1234))
-    X = func(design.data[:, x], design.data.weights)
-    H = length(unique(design.data[!, design.strata]))
-    stratified = groupby(design.data, design.strata)
-    Xt = Array{Float64, 1}(undef, replicates)
-    for i in 1:replicates
-        Xh = []
-        Wh = []
-        for j in 1:H
-            substrata = DataFrame(stratified[j])
-            psus = unique(substrata[!, design.cluster])
-            if length(psus) == 1
-                return DataFrame(statistic = X, SE = 0)
-            end
-            nh = length(psus)
-            gdf = groupby(substrata, design.cluster)
-            selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
-            xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
-            whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
-            append!(Xh, xhij)
-            append!(Wh, whij)
-        end
-        Xh = Float64.(Xh)
-        Wh = Float64.(Wh)
-        Xt[i] = func(Xh, Wh)
-        end 
-        variance = sum((Xt .- X).^2) / replicates
-    return DataFrame(statistic = X, SE = sqrt(variance))
-end
-
-function bootweights(design::SurveyDesign; replicates = 100, rng = MersenneTwister(1234))
+function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwister(1234))
     H = length(unique(design.data[!, design.strata]))
     stratified = groupby(design.data, design.strata)
     function replicate(stratified, H)
@@ -68,7 +33,8 @@ function bootweights(design::SurveyDesign; replicates = 100, rng = MersenneTwist
                 return DataFrame(statistic = X, SE = 0)
             end
             nh = length(psus)
-            rh = [(count(==(i), rand(rng, 1:(nh-1), nh))) for i in 1:nh] # main bootstrap algo. 
+            randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  
+            rh = [(count(==(i), randinds)) for i in 1:nh] # main bootstrap algo. 
             gdf = groupby(substrata, design.cluster)
             for i in 1:nh
                 gdf[i].rh = repeat([rh[i]], nrow(gdf[i]))

From 667193132e0026e563e474218b117c91d6c2afca Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Sun, 1 Jan 2023 17:36:56 +0530
Subject: [PATCH 05/80] Replace other design with the new general design.

---
 src/Survey.jl        |   4 +-
 src/SurveyDesign.jl  | 364 ++++---------------------------------------
 src/boxplot.jl       |   2 +-
 src/by.jl            |  17 ++
 src/dimnames.jl      |  69 --------
 src/hist.jl          |   4 +-
 src/mean.jl          |  76 +++++++--
 src/plot.jl          |   2 +-
 src/quantile.jl      |   6 +-
 src/ratio.jl         |   8 +-
 src/show.jl          |  20 ---
 src/total.jl         | 188 +++++-----------------
 test/SurveyDesign.jl | 197 +----------------------
 test/bootstrap.jl    |  10 --
 test/boxplot.jl      |   2 +-
 test/dimnames.jl     |  16 --
 test/hist.jl         |   2 +-
 test/jackknife.jl    |   6 +-
 test/mean.jl         |  45 +++---
 test/plot.jl         |   2 +-
 test/quantile.jl     |  20 +--
 test/ratio.jl        |   2 +-
 test/runtests.jl     |   2 -
 test/sampling.jl     |   3 -
 test/total.jl        |  14 +-
 25 files changed, 190 insertions(+), 891 deletions(-)
 create mode 100644 src/by.jl
 delete mode 100644 src/dimnames.jl
 delete mode 100644 test/bootstrap.jl
 delete mode 100644 test/dimnames.jl
 delete mode 100644 test/sampling.jl

diff --git a/src/Survey.jl b/src/Survey.jl
index 8854e1d3..f6d3d030 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -21,10 +21,10 @@ include("total.jl")
 include("load_data.jl")
 include("hist.jl")
 include("plot.jl")
-include("dimnames.jl")
 include("boxplot.jl")
 include("show.jl")
 include("ratio.jl")
+include("by.jl")
 
 export load_data
 export AbstractSurveyDesign, SimpleRandomSample, StratifiedSample
@@ -34,7 +34,7 @@ export mean, total, quantile
 export plot
 export hist
 export boxplot
-export Bootstrap
+export bootweights
 export jkknife
 export ratio
 
diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index acfafa50..1541a3b9 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -10,318 +10,6 @@ Supertype for every survey design type.
 """
 abstract type AbstractSurveyDesign end
 
-"""
-    SimpleRandomSample <: AbstractSurveyDesign
-
-
-Survey design sampled by simple random sampling.
-
-# Arguments:
-`data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
-`sampsize::Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}}=UInt(nrow(data))`:  the survey sample size.
-`popsize::Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}}=nothing`: the (expected) survey population size.
-`weights::Union{Nothing,Symbol,Vector{<:Real}}=nothing`: the sampling weights.
-`probs::Union{Nothing,Symbol,Vector{<:Real}}=nothing: the sampling probabilities.
-`ignorefpc::Bool=false`: choose to ignore finite population correction and assume all weights equal to 1.0
-
-The precedence order of using `popsize`, `weights` and `probs` is `popsize` > `weights` > `probs`.
-E.g. If `popsize` is given then it is assumed to be the ground truth over `weights` or `probs`.
-
-If `popsize` is not given `weights` or `probs` must be given. `popsize` is then calculated
-using the weights and the sample size.
-
-```jldoctest
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SimpleRandomSample(apisrs; popsize=:fpc)
-SimpleRandomSample:
-data: 200x42 DataFrame
-weights: 31.0, 31.0, 31.0, ..., 31.0
-probs: 0.0323, 0.0323, 0.0323, ..., 0.0323
-fpc: 6194, 6194, 6194, ..., 6194
-popsize: 6194
-sampsize: 200
-sampfraction: 0.0323
-ignorefpc: false
-```
-"""
-struct SimpleRandomSample <: AbstractSurveyDesign
-    data::AbstractDataFrame
-    sampsize::Union{Unsigned,Nothing}
-    popsize::Union{Unsigned,Nothing}
-    sampfraction::Float64
-    fpc::Float64
-    ignorefpc::Bool
-    function SimpleRandomSample(data::AbstractDataFrame;
-        popsize::Union{Nothing,Symbol,Unsigned,Vector{<:Real}}=nothing,
-        sampsize::Union{Nothing,Symbol,Unsigned,Vector{<:Real}}=nrow(data) |> UInt,
-        weights::Union{Nothing,Symbol,Vector{<:Real}}=nothing,
-        probs::Union{Nothing,Symbol,Vector{<:Real}}=nothing,
-        ignorefpc::Bool=false
-    )
-        # If any of weights or probs given as Symbol,
-        # find the corresponding column in `data`
-        if isa(weights, Symbol)
-            weights = data[!, weights]
-        end
-        if isa(probs, Symbol)
-            probs = data[!, probs]
-        end
-        # If weights/probs vector not numeric/real, ie. string column passed for weights, then raise error
-        if !isa(weights, Union{Nothing,Vector{<:Real}})
-            error("weights should be Vector{<:Real}. You passed $(typeof(weights))")
-        elseif !isa(probs, Union{Nothing,Vector{<:Real}})
-            error("sampling probabilities should be Vector{<:Real}. You passed $(typeof(probs))")
-        end
-        # If popsize given as Symbol or Vector, check all records equal 
-        if isa(popsize, Symbol)
-            if !all(w -> w == first(data[!, popsize]), data[!, popsize])
-                error("popsize must be same for all observations in Simple Random Sample")
-            end
-            popsize = first(data[!, popsize]) |> UInt
-        elseif isa(popsize, Vector{<:Real})
-            if !all(w -> w == first(popsize), popsize)
-                error("popsize must be same for all observations in Simple Random Sample")
-            end
-            popsize = first(popsize) |> UInt
-        end
-        # If sampsize given as Symbol or Vector, check all records equal 
-        if isa(sampsize, Symbol)
-            if !all(w -> w == first(data[!, sampsize]), data[!, sampsize])
-                error("sampsize must be same for all observations in Simple Random Sample")
-            end
-            sampsize = first(data[!, sampsize]) |> UInt
-        elseif isa(sampsize, Vector{<:Real})
-            if !all(w -> w == first(sampsize), sampsize)
-                error("sampsize must be same for all observations in Simple Random Sample")
-            end
-            sampsize = first(sampsize) |> UInt
-        end
-        # If both `weights` and `probs` given, then `weights` is assumed to be ground truth for probs.
-        if !isnothing(weights) && !isnothing(probs)
-            probs = 1 ./ weights
-            data[!, :probs] = probs
-        end
-        # popsize must be nothing or <:Unsigned by now
-        if isnothing(popsize)
-            # If popsize not given, fallback to weights, probs and sampsize to estimate `popsize`
-            @warn "popsize not given. using weights/probs and sampsize to estimate `popsize`"
-            # Check that all weights (or probs if weights not given) are equal, as SRS is by definition equi-weighted
-            if typeof(weights) <: Vector{<:Real}
-                if !all(w -> w == first(weights), weights)
-                    error("all frequency weights must be equal for Simple Random Sample")
-                end
-            elseif typeof(probs) <: Vector{<:Real}
-                if !all(p -> p == first(probs), probs)
-                    error("all probability weights must be equal for Simple Random Sample")
-                end
-                weights = 1 ./ probs
-            else
-                error("either weights or probs must be given if `popsize` not given")
-            end
-            # Estimate population size
-            popsize = round(sampsize * first(weights)) |> UInt
-        elseif typeof(popsize) <: Unsigned
-            weights = fill(popsize / sampsize, nrow(data)) # If popsize is given, weights vector is made concordant with popsize and sampsize, regardless of given weights argument
-            probs = 1 ./ weights
-        else
-            error("something went wrong, please check validity of inputs.")
-        end
-        # If sampsize greater than popsize than illogical arguments specified.
-        if sampsize > popsize
-            error("population size was estimated to be less than given sampsize. Please check input arguments.")
-        end
-        # If ignorefpc then set weights to 1 ??
-        # TODO: This works under some cases, but should find better way to process ignoring fpc
-        if ignorefpc
-            @warn "assuming all weights are equal to 1.0"
-            weights = ones(nrow(data))
-            probs = 1 ./ weights
-        end
-        # sum of weights must equal to `popsize` for SRS
-        if !isnothing(weights) && !(isapprox(sum(weights), popsize; atol=1e-4))
-            if ignorefpc && !(isapprox(sum(weights), sampsize; atol=1e-4)) # Change if ignorefpc functionality changes
-                error("sum of sampling weights should be equal to `sampsize` for `SimpleRandomSample` with `ignorefpc`")
-            elseif !ignorefpc
-                error("sum of sampling weights must be equal to `popsize` for `SimpleRandomSample`")
-            end
-        end
-        # sum of probs must equal popsize for SRS
-        if !isnothing(probs) && !(isapprox(sum(1 ./ probs), popsize; atol=1e-4))
-            if ignorefpc && !(isapprox(sum(1 ./ probs), sampsize; atol=1e-4)) # Change if ignorefpc functionality changes
-                error("sum of inverse sampling probabilities should be equal to `sampsize` for `SimpleRandomSample` with `ignorefpc`")
-            elseif !ignorefpc
-                error("sum of inverse of sampling probabilities must be equal to `popsize` for Simple Random Sample")
-            end
-        end
-        ## Set remaining parts of data structure
-        # set sampling fraction
-        sampfraction = sampsize / popsize
-        # set fpc
-        fpc = ignorefpc ? 1 : 1 - (sampsize / popsize)
-        # add columns for frequency and probability weights to `data`
-        data[!, :weights] = weights
-        if isnothing(probs)
-            probs = 1 ./ data[!, :weights]
-        end
-        data[!, :probs] = probs
-        # Initialise the structure
-        new(data, sampsize, popsize, sampfraction, fpc, ignorefpc)
-    end
-end
-
-"""
-    StratifiedSample <: AbstractSurveyDesign
-
-Survey design sampled by stratification.
-
-`strata` must be specified as a Symbol name of a column in `data`.
-
-# Arguments:
-`data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
-`strata::Symbol`: the stratification variable - must be given as a column in `data`.
-`sampsize::Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}}=UInt(nrow(data))`:  the survey sample size.
-`popsize::Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}}=nothing`: the (expected) survey population size.
-`weights::Union{Nothing,Symbol,Vector{<:Real}}=nothing`: the sampling weights.
-`probs::Union{Nothing,Symbol,Vector{<:Real}}=nothing: the sampling probabilities.
-`ignorefpc::Bool=false`: choose to ignore finite population correction and assume all weights equal to 1.0
-
-The `popsize`, `weights` and `probs` parameters follow the same rules as for [`SimpleRandomSample`](@ref).
-
-```jldoctest
-julia> apistrat = load_data("apistrat");
-
-julia> dstrat = StratifiedSample(apistrat, :stype; popsize=:fpc)
-StratifiedSample:
-data: 200x45 DataFrame
-strata: stype
-weights: 44.2, 44.2, 44.2, ..., 15.1
-probs: 0.0226, 0.0226, 0.0226, ..., 0.0662
-fpc: 0.977, 0.977, 0.977, ..., 0.934
-popsize: 4421, 4421, 4421, ..., 755
-sampsize: 100, 100, 100, ..., 50
-sampfraction: 0.0226, 0.0226, 0.0226, ..., 0.0662
-ignorefpc: false
-```
-"""
-struct StratifiedSample <: AbstractSurveyDesign
-    data::AbstractDataFrame
-    strata::Symbol
-    ignorefpc::Bool
-    function StratifiedSample(data::AbstractDataFrame, strata::Symbol;
-        popsize::Union{Nothing,Symbol}=nothing,
-        sampsize::Union{Nothing,Symbol}=nothing,
-        weights::Union{Nothing,Symbol,Vector{<:Real}}=nothing,
-        probs::Union{Nothing,Symbol,Vector{<:Real}}=nothing,
-        ignorefpc::Bool=false
-    )
-        # Store the iterator over each strata, as used multiple times
-        data_groupedby_strata = groupby(data, strata)
-        # If any of weights or probs given as Symbol, find the corresponding column in `data`
-        if isa(weights, Symbol)
-            for each_strata in keys(data_groupedby_strata)
-                if !all(w -> w == first(data_groupedby_strata[each_strata][!, weights]), data_groupedby_strata[each_strata][!, weights])
-                    error("sampling weights within each strata must be equal in StratifiedSample")
-                end
-            end
-            # original_weights_colname = copy(weights)
-            weights = data[!, weights] # If all good with weights column, then store it as Vector
-        end
-        if isa(probs, Symbol)
-            for each_strata in keys(data_groupedby_strata)
-                if !all(p -> p == first(data_groupedby_strata[each_strata][!, probs]), data_groupedby_strata[each_strata][!, probs])
-                    error("sampling probabilities within each strata must be equal in StratifiedSample")
-                end
-            end
-            # original_probs_colname = copy(probs)
-            probs = data[!, probs] # If all good with probs column, then store it as Vector
-        end
-        # If weights/probs vector not numeric/real, ie. string column passed for weights, then raise error
-        if !isa(weights, Union{Nothing,Vector{<:Real}})
-            error("weights should be Vector{<:Real}. You passed $(typeof(weights))")
-        elseif !isa(probs, Union{Nothing,Vector{<:Real}})
-            error("sampling probabilities should be Vector{<:Real}. You passed $(typeof(probs))")
-        end
-        # If popsize given as Symbol or Vector, check all records equal in each strata
-        if isa(popsize, Symbol)
-            for each_strata in keys(data_groupedby_strata)
-                if !all(w -> w == first(data_groupedby_strata[each_strata][!, popsize]), data_groupedby_strata[each_strata][!, popsize])
-                    error("popsize must be same for all observations within each strata in StratifiedSample")
-                end
-            end
-            # original_popsize_colname = copy(popsize)
-            popsize = data[!, popsize]
-        end
-        # If sampsize given as Symbol or Vector, check all records equal 
-        if isa(sampsize, Symbol)
-            if isnothing(popsize) && isnothing(weights) && isnothing(probs)
-                error("if sampsize given, and popsize not given, then weights or probs must given to calculate popsize")
-            end
-            for each_strata in keys(data_groupedby_strata)
-                if !all(w -> w == first(data_groupedby_strata[each_strata][!, sampsize]), data_groupedby_strata[each_strata][!, sampsize])
-                    error("sampsize must be same for all observations within each strata in StratifiedSample")
-                end
-            end
-            # original_sampsize_colname = copy(sampsize)
-            sampsize = data[!, sampsize]
-            # If sampsize column not provided in constructor call, set it as nrow of strata
-        elseif isnothing(sampsize)
-            sampsize = transform(groupby(data, strata), nrow => :counts).counts
-        end
-        # If both `weights` and `probs` given, then `weights` is assumed to be ground truth for probs.
-        if !isnothing(weights) && !isnothing(probs)
-            probs = 1 ./ weights
-            data[!, :probs] = probs
-        end
-        # `popsize` is either nothing or a Vector{<:Real} by now
-        if isnothing(popsize)
-            # If popsize not given, fallback to weights, probs and sampsize to estimate `popsize`
-            @warn "popsize not given. using weights/probs and sampsize to estimate `popsize` for StratifiedSample"
-            # Check that all weights (or probs if weights not given) are equal, as SRS is by definition equi-weighted
-            if typeof(probs) <: Vector{<:Real}
-                weights = 1 ./ probs
-            elseif !(typeof(weights) <: Vector{<:Real})
-                error("either weights or probs must be given if `popsize` not given")
-            end
-            # Estimate population size
-            popsize = sampsize .* weights
-        elseif typeof(popsize) <: Vector{<:Real} # Still need to check if the provided Column is of <:Real
-            # If popsize is given, weights and probs made concordant with popsize and sampsize, regardless of supplied arguments
-            weights = popsize ./ sampsize
-            probs = 1 ./ weights
-        else
-            error("something went wrong. Please check validity of inputs.")
-        end
-        # If sampsize greater than popsize than illogical arguments specified.
-        if any(sampsize .> popsize)
-            error("population sizes were estimated to be less than sampsize. please check input arguments.")
-        end
-        # If ignorefpc then set weights to 1 ??
-        # TODO: This works under some cases, but should find better way to process ignoring fpc
-        if ignorefpc
-            @warn "assuming all weights are equal to 1.0"
-            weights = ones(nrow(data))
-            probs = 1 ./ weights
-        end
-        ## Set remaining parts of data structure
-        # set sampling fraction
-        sampfraction = sampsize ./ popsize
-        # set fpc
-        fpc = ignorefpc ? fill(1, size(data, 1)) : 1 .- (sampsize ./ popsize)
-        # add columns for frequency and probability weights to `data`
-        data[!, :weights] = weights
-        if isnothing(probs)
-            probs = 1 ./ data[!, :weights]
-        end
-        data[!, :probs] = probs
-        data[!, :sampsize] = sampsize
-        data[!, :popsize] = popsize
-        data[!, :fpc] = fpc
-        data[!, :sampfraction] = sampfraction
-        new(data, strata, ignorefpc)
-    end
-end
-
 """
     SurveyDesign <: AbstractSurveyDesign
 
@@ -343,37 +31,15 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
-SurveyDesign:
-data: 183x45 DataFrame
-clusters: dnum
-design.data[!,design.clusters]: 637, 637, 637, ..., 448
-popsize: fpc
-design.data[!,design.popsize]: 757, 757, 757, ..., 757
-sampsize: sampsize
-design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-weights: weights
-design.data[!,design.weights]: 50.5, 50.5, 50.5, ..., 50.5
-design.data[!,:strata]: 1.0, 1.0, 1.0, ..., 1.0
-design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
-design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
-
-julia> apiclus1 = load_data("apiclus1");
-
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
-
-julia> dclus1 = SurveyDesign(apiclus1, :dnum; weights=:pw)
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
 SurveyDesign:
 data: 183x46 DataFrame
-clusters: dnum
-design.data[!,design.clusters]: 637, 637, 637, ..., 448
+cluster: dnum
+design.data[!,design.cluster]: 637, 637, 637, ..., 448
 popsize: popsize
-design.data[!,design.popsize]: 757.0, 757.0, 757.0, ..., 757.0
+design.data[!,design.popsize]: 9240.0, 9240.0, 9240.0, ..., 9240.0
 sampsize: sampsize
 design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-weights: pw
-design.data[!,design.weights]: 50.5, 50.5, 50.5, ..., 50.5
-design.data[!,:strata]: 1.0, 1.0, 1.0, ..., 1.0
 design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
 design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
 ```
@@ -424,6 +90,28 @@ struct SurveyDesign <: AbstractSurveyDesign
     end
 end
 
+"""
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
+
+julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
+
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+
+julia> bclus1 = Survey.bootweights(dclus1; replicates = 1000)
+Survey.ReplicateDesign:
+data: 183x1046 DataFrame
+cluster: dnum
+design.data[!,design.cluster]: 637, 637, 637, ..., 448
+popsize: popsize
+design.data[!,design.popsize]: 9240.0, 9240.0, 9240.0, ..., 9240.0
+sampsize: sampsize
+design.data[!,design.sampsize]: 15, 15, 15, ..., 15
+design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
+design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
+replicates: 1000
+```
+"""
 struct ReplicateDesign <: AbstractSurveyDesign
     data::AbstractDataFrame
     cluster::Symbol
diff --git a/src/boxplot.jl b/src/boxplot.jl
index 46f6958a..8790f116 100644
--- a/src/boxplot.jl
+++ b/src/boxplot.jl
@@ -10,7 +10,7 @@ The keyword arguments are all the arguments that can be passed to `mapping` in
 
 ```@example boxplot
 apisrs = load_data("apisrs");
-srs = SimpleRandomSample(apisrs; weights = :pw);
+srs = srs = SurveyDesign(apisrs; weights=:pw);
 bp = boxplot(srs, :stype, :enroll; weights = :pw)
 save("boxplot.png", bp); nothing # hide
 ```
diff --git a/src/by.jl b/src/by.jl
new file mode 100644
index 00000000..30cb2dd2
--- /dev/null
+++ b/src/by.jl
@@ -0,0 +1,17 @@
+function bydomain(x::Symbol, domain::Symbol, design::ReplicateDesign, func::Function)
+    gdf = groupby(design.data, domain)
+    nd = length(unique(design.data[!, domain]))
+    X = combine(gdf, [x, :weights] => ((a, b) -> func(a, weights(b))) => :statistic)
+    Xt_mat = Array{Float64, 2}(undef, (nd, design.replicates))
+    for i in 1:design.replicates
+        Xt_mat[:, i] = combine(gdf, [x, :weights, Symbol("replicate_"*string(i))] => ((a, b, c) -> func(a, weights(b .* c))) => :statistic).statistic
+    end
+    ses = []
+    for i in 1:nd
+        filtered_dx = filter(!isnan, Xt_mat[i, :] .- X.statistic[i])
+        push!(ses, sqrt(sum(filtered_dx.^2) / length(filtered_dx)))
+    end
+    replace!(ses, NaN => 0)
+    X.SE = ses
+    return X
+end
\ No newline at end of file
diff --git a/src/dimnames.jl b/src/dimnames.jl
deleted file mode 100644
index 91bd473e..00000000
--- a/src/dimnames.jl
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-	dim(design)
-
-Get the dimensions of a `SurveyDesign`.
-
-```jldoctest
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SimpleRandomSample(apisrs; popsize =:fpc);
-
-julia> dim(srs)
-(200, 42)
-```
-"""
-dim(design::AbstractSurveyDesign) = size(design.data)
-
-"""
-	colnames(design)
-
-Get the column names of a `SurveyDesign`.
-
-```jldoctest
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SimpleRandomSample(apisrs; popsize=:fpc);
-
-julia> colnames(srs)
-42-element Vector{String}:
- "Column1"
- "cds"
- "stype"
- "name"
- "sname"
- "snum"
- "dname"
- "dnum"
- "cname"
- "cnum"
- ⋮
- "avg.ed"
- "full"
- "emer"
- "enroll"
- "api.stu"
- "pw"
- "fpc"
- "weights"
- "probs"
-```
-"""
-colnames(design::AbstractSurveyDesign) = names(design.data)
-
-"""
-	dimnames(design)
-
-Get the names of the rows and columns of a `SurveyDesign`.
-
-```jldoctest
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
-
-julia> dimnames(srs)
-2-element Vector{Vector{String}}:
- ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"  …  "191", "192", "193", "194", "195", "196", "197", "198", "199", "200"]
- ["Column1", "cds", "stype", "name", "sname", "snum", "dname", "dnum", "cname", "cnum"  …  "grad.sch", "avg.ed", "full", "emer", "enroll", "api.stu", "pw", "fpc", "weights", "probs"]
-```
-"""
-dimnames(design::AbstractSurveyDesign) = [string.(1:size(design.data, 1)), names(design.data)]
diff --git a/src/hist.jl b/src/hist.jl
index c140e59e..90d42d1b 100644
--- a/src/hist.jl
+++ b/src/hist.jl
@@ -11,7 +11,7 @@ Calculate the number of bins to use in a histogram using the Sturges rule.
 ```jldoctest
 julia> apisrs = load_data("apisrs");
 
-julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
+julia> srs = SurveyDesign(apisrs; weights=:pw);
 
 julia> sturges(srs, :enroll)
 9
@@ -31,7 +31,7 @@ Calculate the number of bins to use in a histogram using the Freedman-Diaconis r
 ```jldoctest
 julia> apisrs = load_data("apisrs");
 
-julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
+julia> srs = SurveyDesign(apisrs; weights=:pw);
 
 julia> freedman_diaconis(srs, :enroll)
 18
diff --git a/src/mean.jl b/src/mean.jl
index f19ee1f5..5dc679f3 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -22,22 +22,68 @@ function mean(x::Symbol, design::ReplicateDesign)
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(mean = X, SE = sqrt(variance))
 end
+"""
+```jldoctest
+julia> using Survey, Random, StatsBase; 
+
+julia> apiclus1 = load_data("apiclus1"); 
+
+julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
 
+julia> bclus1 = bootweights(apiclus1; replicates = 1000)
 
+julia> mean(:api00, :cname, bclus1) |> print
+38×3 DataFrame
+ Row │ cname            statistic  SE          
+     │ String15         Float64    Any         
+─────┼─────────────────────────────────────────
+   1 │ Kern               573.6    44.5578
+   2 │ Los Angeles        658.156  22.2058
+   3 │ Orange             749.333  29.5701
+   4 │ San Luis Obispo    739.0    3.37273e-14
+   5 │ San Francisco      558.333  45.6266
+   6 │ Modoc              671.0    0.0
+   7 │ Alameda            676.091  37.3104
+   8 │ Solano             623.0    45.1222
+   9 │ Santa Cruz         624.333  113.43
+  10 │ Monterey           605.0    85.4116
+  11 │ San Bernardino     614.462  30.0066
+  12 │ Riverside          574.3    27.2025
+  13 │ Tulare             664.0    22.0097
+  14 │ San Diego          684.5    32.2241
+  15 │ Sacramento         616.0    39.7877
+  16 │ Marin              799.667  35.2397
+  17 │ Imperial           622.0    0.0
+  18 │ Ventura            743.8    31.7425
+  19 │ San Joaquin        608.667  40.8592
+  20 │ Sonoma             630.0    0.0
+  21 │ Fresno             600.25   56.9173
+  22 │ Santa Clara        718.286  58.562
+  23 │ Sutter             744.0    0.0
+  24 │ Contra Costa       766.111  53.598
+  25 │ Stanislaus         736.333  5.26576
+  26 │ Madera             480.0    3.5861
+  27 │ Placer             759.0    0.0
+  28 │ Lassen             752.0    0.0
+  29 │ Santa Barbara      728.667  25.8749
+  30 │ San Mateo          617.0    78.1173
+  31 │ Siskiyou           699.0    0.0
+  32 │ Kings              469.5    44.6284
+  33 │ Shasta             754.0    60.5829
+  34 │ Yolo               475.0    0.0
+  35 │ Calaveras          790.0    0.0
+  36 │ Napa               727.0    50.5542
+  37 │ Lake               804.0    0.0
+  38 │ Merced             595.0    0
+```
+"""
 function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
-    gdf = groupby(design.data, domain)
-    nd = length(unique(design.data[!, domain]))
-    X = combine(gdf, [x, :weights] => ((a, b) -> mean(a, weights(b))) => :mean)
-    Xt_mat = Array{Float64, 2}(undef, (nd, design.replicates))
-    for i in 1:design.replicates
-        Xt_mat[:, i] = combine(gdf, [x, :weights, Symbol("replicate_"*string(i))] => ((a, b, c) -> mean(a, weights(b .* c))) => :mean).mean
-    end
-    ses = []
-    for i in 1:nd
-        filtered_dx = filter(!isnan, Xt_mat[i, :] .- X.mean[i])
-        push!(ses, sqrt(sum(filtered_dx.^2) / length(filtered_dx)))
-    end
-    X.SE = ses
-    # X.SE = sqrt.(sum((Xt_mat .- X.mean).^2 / design.replicates, dims = 2))[:,1]
-    return X
+    weighted_mean(x, w) = mean(x, StatsBase.weights(w))
+    bydomain(x, domain, design, weighted_mean)
+end
+
+function mean(x::Vector{Symbol}, design::ReplicateDesign)
+    df = reduce(vcat, [mean(i, design) for i in x])
+    insertcols!(df, 1, :names => String.(x))
+    return df
 end
\ No newline at end of file
diff --git a/src/plot.jl b/src/plot.jl
index cb9792d1..7dd4f555 100644
--- a/src/plot.jl
+++ b/src/plot.jl
@@ -8,7 +8,7 @@ in the design.
 
 ```@example plot
 apisrs = load_data("apisrs");
-srs = SimpleRandomSample(apisrs; weights = :pw);
+srs = SurveyDesign(apisrs; weights=:pw);
 s = plot(srs, :api99, :api00)
 save("scatter.png", s); nothing # hide
 ```
diff --git a/src/quantile.jl b/src/quantile.jl
index 8ee6000c..1cc9646c 100644
--- a/src/quantile.jl
+++ b/src/quantile.jl
@@ -13,7 +13,7 @@ The Julia, R and Python-numpy use the same defaults
 ```jldoctest
 julia> apisrs = load_data("apisrs");
 
-julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
+julia> srs = SurveyDesign(apisrs; weights=:pw);
 
 julia> quantile(:api00,srs,0.5)
 1×2 DataFrame
@@ -34,7 +34,7 @@ julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
    5 │        0.95    1473.1
 ```
 """
-function quantile(var::Symbol, design::SimpleRandomSample, p::Union{<:Real,Vector{<:Real}}; 
+function quantile(var::Symbol, design::SurveyDesign, p::Union{<:Real,Vector{<:Real}}; 
     alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...)
     v = design.data[!, var]
     probs = design.data[!, :probs]
@@ -43,7 +43,7 @@ function quantile(var::Symbol, design::SimpleRandomSample, p::Union{<:Real,Vecto
     return df
 end
 
-function quantile(var::Symbol, design::StratifiedSample, p::Union{<:Real,Vector{<:Real}}; 
+function quantile(var::Symbol, design::SurveyDesign, p::Union{<:Real,Vector{<:Real}}; 
     alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...)
     v = design.data[!, var]
     probs = design.data[!, :probs]
diff --git a/src/ratio.jl b/src/ratio.jl
index 8e923d42..67e51668 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -9,7 +9,7 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc);
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw);
 
 julia> ratio(:api00, :enroll, dclus1)
 1×2 DataFrame
@@ -35,8 +35,4 @@ function ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesig
     end
     var = c*(nh-1)/nh
     return DataFrame(Statistic = statistic, SE = sqrt(var))
-end
-
-# function ratio(x::Symbol, design::ReplicateDesign)
-#     design.data[!, "ones"] = ones(nrow(design.data))
-# end
\ No newline at end of file
+end
\ No newline at end of file
diff --git a/src/show.jl b/src/show.jl
index 4adb61d8..3319e653 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -33,20 +33,6 @@ function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
     printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
 end
 
-function Base.show(io::IO, ::MIME"text/plain", design::StratifiedSample)
-    type = typeof(design)
-    printstyled(io, "$type:\n"; bold=true)
-    printstyled(io, "data: "; bold=true)
-    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
-    printinfo(io, "strata", string(design.strata); newline=true)
-    printinfo(io, "weights", makeshort(design.data.weights))
-    printinfo(io, "probs", makeshort(design.data.probs))
-    printinfo(io, "fpc", makeshort(design.data.fpc))
-    printinfo(io, "popsize", makeshort(design.data.popsize))
-    printinfo(io, "sampsize", makeshort(design.data.sampsize))
-    printinfo(io, "sampfraction", makeshort(design.data.sampfraction))
-    printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
-end
 
 "Print information about a survey design."
 function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
@@ -60,9 +46,6 @@ function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
     printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
     printinfo(io, "sampsize", string(design.sampsize); newline=true)
     printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    # printinfo(io, "weights", string(design.weights); newline=true)
-    # printinfo(io, "design.data[!,design.weights]", makeshort(design.data[!,design.weights]))
-    # printinfo(io, "design.data[!,:strata]", makeshort(design.data[!,:strata]))
     printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
     printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
 end
@@ -79,9 +62,6 @@ function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
     printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
     printinfo(io, "sampsize", string(design.sampsize); newline=true)
     printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    # printinfo(io, "weights", string(design.weights); newline=true)
-    # printinfo(io, "design.data[!,design.weights]", makeshort(design.data[!,design.weights]))
-    # printinfo(io, "design.data[!,:strata]", makeshort(design.data[!,:strata]))
     printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
     printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
     printstyled(io, "replicates: "; bold=true)
diff --git a/src/total.jl b/src/total.jl
index f9824255..d298d62d 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -1,173 +1,55 @@
 """
-    total(x, design)
-
-Estimate the population total for the variable specified by `x`.
-
-For SurveyDesign, formula adapted from Sarndal pg129, section 4.2.2 Simple Random Cluster Sampling
-
 ```jldoctest
-julia> using Survey;
-
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SimpleRandomSample(apisrs; popsize=:fpc);
-
-julia> total(:enroll, srs)
-1×2 DataFrame
- Row │ total      SE 
-     │ Float64    Float64  
-─────┼─────────────────────
-   1 │ 3.62107e6  1.6952e5
-
-julia> strat = load_data("apistrat");
-
-julia> dstrat = StratifiedSample(strat, :stype; popsize=:fpc);
-
-julia> total(:api00, dstrat)
-1×2 DataFrame
- Row │ total      SE      
-     │ Float64    Float64 
-─────┼────────────────────
-   1 │ 4.10221e6  58279.0
-
-julia> total([:api00, :enroll], dstrat)
-2×3 DataFrame
- Row │ names   total      SE            
-     │ String  Float64    Float64       
-─────┼──────────────────────────────────
-   1 │ api00   4.10221e6  58279.0
-   2 │ enroll  3.68718e6      1.14642e5
-```
-"""
-function total(x::Symbol, design::SimpleRandomSample)
-    function se(x::Symbol, design::SimpleRandomSample)
-        function variance(x::Symbol, design::SimpleRandomSample)
-            return design.popsize^2 * design.fpc * var(design.data[!, x]) / design.sampsize
-        end
-        return sqrt(variance(x, design))
-    end
-    if isa(design.data[!, x], CategoricalArray)
-        gdf = groupby(design.data, x)
-        p = combine(gdf, nrow => :count)
-        p.total = design.popsize .* p.count ./ sum(p.count)
-        p.proportion = p.total ./ design.popsize
-        p = select!(p, Not(:count)) # count column is not necessary for `total`
-        p.var = design.popsize^2 .* design.fpc .* p.proportion .*
-                (1 .- p.proportion) ./ (design.sampsize - 1) # N^2 .* variance of proportion
-        p.SE = sqrt.(p.var)
-        return select(p, Not([:proportion, :var]))
-    end
-    m = mean(x,design)
-    total = design.popsize * m.mean[1]
-    return DataFrame(total=total, SE=se(x, design))
-end
-
-function total(x::Symbol, design::StratifiedSample)
-    # TODO: check if statement
-    if x == design.strata
-        gdf = groupby(design.data, x)
-        return combine(gdf, :weights => sum => :Nₕ)
-    end
-    gdf = groupby(design.data, design.strata)
-    grand_total = sum(combine(gdf, [x, :weights] => ((a, b) -> wsum(a, b)) => :total).total)
-    # variance estimation using closed-form formula
-    Nₕ = combine(gdf, :weights => sum => :Nₕ).Nₕ
-    nₕ = combine(gdf, nrow => :nₕ).nₕ
-    fₕ = nₕ ./ Nₕ
-
-    s²ₕ = combine(gdf, x => var => :s²h).s²h
-    # the only difference between total and mean variance is the Nₕ instead of Wₕ
-    V̂Ȳ̂ = sum((Nₕ .^ 2) .* (1 .- fₕ) .* s²ₕ ./ nₕ)
-    SE = sqrt(V̂Ȳ̂)
-    return DataFrame(total=grand_total, SE=SE)
-end
-
-function total(x::Vector{Symbol}, design::AbstractSurveyDesign)
-    df = reduce(vcat, [total(i, design) for i in x])
-    insertcols!(df, 1, :names => String.(x))
-    return df
-end
-
-"""
-```jldoctest
-julia> using Survey
+julia> using Survey; 
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+
+julia> bclus1 = bootweights(dclus1; replicates = 1000); 
 
-julia> total(:api00, dclus1)
+julia> total(:api00, bclus1)
 1×2 DataFrame
- Row │ total      SE        
+ Row │ mean       SE        
      │ Float64    Float64   
 ─────┼──────────────────────
-   1 │ 5.94916e6  1.33948e6
-```
-"""
-function total(x::Symbol, design::SurveyDesign)
-    gdf = groupby(design.data, design.cluster)
-    ŷₜ = combine(gdf, x => sum => :sum).sum
-    Nₜ = first(design.data[!,design.popsize])
-    Ȳ = Nₜ * mean(ŷₜ)
-    nₜ = first(design.data[!,design.sampsize])
-    s²ₜ = var(ŷₜ)
-    VȲ = Nₜ^2 * (1 - nₜ/Nₜ) * s²ₜ / nₜ
-    return DataFrame(total = Ȳ, SE = sqrt(VȲ))
-end
-
-"""
-    total(x, by, design)
-
-Estimate the subpopulation total of a variable `x`.
-
-```jldoctest
-julia> using  Survey;
-
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SimpleRandomSample(apisrs;popsize=:fpc);
-
-julia> total(:api00, :cname, srs) |> first
-DataFrameRow
- Row │ cname     total      SE     
-     │ String15  Float64    Float64 
-─────┼──────────────────────────────
-   1 │ Kern      1.77644e5  55600.8
-
+   1 │ 5.94916e6  2.01705e6
 ```
 """
-function total(x::Symbol, by::Symbol, design::SimpleRandomSample)
-    function domain_total(x::AbstractVector, design::SimpleRandomSample, weights)
-        function se(x::AbstractVector, design::SimpleRandomSample)
-            # vector of length equal to `sampsize` containing `x` and zeros
-            z = cat(zeros(design.sampsize - length(x)), x; dims=1)
-            variance = design.popsize^2 / design.sampsize * design.fpc * var(z)
-            return sqrt(variance)
-        end
-        total = wsum(x, weights)
-        return DataFrame(total=total, SE=se(x, design::SimpleRandomSample))
-    end
-    gdf = groupby(design.data, by)
-    combine(gdf, [x, :weights] => ((a, b) -> domain_total(a, design, b)) => AsTable)
+function total(x::Symbol, design::ReplicateDesign)
+    X = wsum(design.data[!, x], weights(design.data.weights))
+    Xt = [wsum(design.data[!, x], weights(design.data.weights .* design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
+    variance = sum((Xt .- X).^2) / design.replicates
+    DataFrame(total = X, SE = sqrt(variance))
 end
-
 """
 ```jldoctest
-julia> using Survey, Random, StatsBase; 
+julia> using Survey; 
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
-
-julia> total(:api00, dclus1, Bootstrap(replicates = 1000, rng = MersenneTwister(111)))
-1×2 DataFrame
- Row │ total      SE        
-     │ Float64    Float64   
-─────┼──────────────────────
-   1 │ 5.94916e6  1.36593e6
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+
+julia> bclus1 = bootweights(dclus1; replicates = 1000); 
+
+julia> total(:api00, :cname, bclus1) |> print
+11×3 DataFrame
+ Row │ cname        statistic      SE        
+     │ String15     Float64        Any       
+─────┼───────────────────────────────────────
+   1 │ Alameda          3.71384e5  3.78375e5
+   2 │ Fresno       95281.1        96134.8
+   3 │ Kern         45672.3        43544.7
+   4 │ Los Angeles      4.89981e5  4.42865e5
+   5 │ Mendocino        1.25813e5  1.22757e5
+   6 │ Merced           1.04819e5  1.09032e5
+   7 │ Orange           5.73756e5  6.01213e5
+   8 │ Plumas           3.2228e5   3.26443e5
+   9 │ San Diego        1.83038e6  1.34155e6
+  10 │ San Joaquin      1.02922e6  1.04048e6
+  11 │ Santa Clara      9.60583e5  643492.0
 ```
 """
-function total(x::Symbol, design::SurveyDesign, method::Bootstrap)
-    df = bootstrap(x, design, wsum; method.replicates, method.rng)
-    df = rename(df, :statistic => :total)
+function total(x::Symbol, domain::Symbol, design::ReplicateDesign)
+    bydomain(x, domain, design, wsum)
 end
\ No newline at end of file
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index 0f3bc796..6851f4e8 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -1,170 +1,3 @@
-# Work on copies, keep original
-@testset "SimpleRandomSample" begin
-    ##### SimpleRandomSample tests
-    # Load API datasets
-    apisrs_original = load_data("apisrs")
-    apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw
-    apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1))
-    ##############################
-    ### Valid type checking tests
-    apisrs = copy(apisrs_original)
-    @test_throws TypeError SimpleRandomSample(apisrs, popsize=-2.83, ignorefpc=true)
-    @test_throws TypeError SimpleRandomSample(apisrs, sampsize=-300)
-    @test_throws TypeError SimpleRandomSample(apisrs, sampsize=-2.8, ignorefpc=true)
-    @test_throws TypeError SimpleRandomSample(apisrs, weights=50)
-    @test_throws TypeError SimpleRandomSample(apisrs, probs=1)
-    ##############################
-    ### weights or probs as Symbol
-    apisrs = copy(apisrs_original)
-    srs_weights = SimpleRandomSample(apisrs; weights=:pw)
-    @test srs_weights.data.weights[1] ≈ 30.97 atol = 1e-4
-    @test srs_weights.data.weights == 1 ./ srs_weights.data.probs
-    ### probs as Symbol
-    apisrs = copy(apisrs_original)
-    srs_probs_sym = SimpleRandomSample(apisrs; probs=:derived_probs)
-    @test srs_probs_sym.data.probs[1] ≈ 0.032289 atol = 1e-4
-    @test srs_probs_sym.data.probs == 1 ./ srs_probs_sym.data.weights
-    ##############################
-    ### Weights or probs as non-numeric error
-    apisrs = copy(apisrs_original)
-    @test_throws ErrorException SimpleRandomSample(apisrs, weights=:stype)
-    @test_throws ErrorException SimpleRandomSample(apisrs, probs=:cname)
-    ##############################
-    ### popsize given as Symbol
-    apisrs = copy(apisrs_original)
-    srs_popsize_sym = SimpleRandomSample(apisrs; popsize=:fpc)
-    @test srs_popsize_sym.data.weights == 1 ./ srs_popsize_sym.data.probs # weights should be inverse of probs
-    @test srs_popsize_sym.sampsize > 0
-    ### popsize given as Vector
-    apisrs = copy(apisrs_original)
-    srs_popsize_vec = SimpleRandomSample(apisrs; popsize=apisrs.fpc)
-    @test srs_popsize_vec.data.weights == 1 ./ srs_popsize_vec.data.probs # weights should be inverse of probs
-    @test srs_popsize_vec.sampsize > 0
-    ##############################
-    ### sampsize given as Symbol
-    apisrs = copy(apisrs_original)
-    srs_sampsize_sym = SimpleRandomSample(apisrs; sampsize=:derived_sampsize, weights=:pw)
-    @test srs_sampsize_sym.data.weights == 1 ./ srs_sampsize_sym.data.probs # weights should be inverse of probs
-    @test srs_sampsize_sym.sampsize > 0
-    ### sampsize given as Vector
-    apisrs = copy(apisrs_original)
-    srs_sampsize_vec = SimpleRandomSample(apisrs; sampsize=apisrs.derived_sampsize, probs=:derived_probs)
-    @test srs_sampsize_vec.data.weights == 1 ./ srs_sampsize_vec.data.probs # weights should be inverse of probs
-    @test srs_sampsize_vec.sampsize > 0
-    ##############################
-    ### both weights and probs given
-    # If weights given, probs is superfluous
-    apisrs = copy(apisrs_original)
-    srs_weights_probs = SimpleRandomSample(apisrs; weights=:pw, probs=:derived_probs)
-    srs_weights_probs = SimpleRandomSample(apisrs; weights=:pw, probs=:pw)
-    ##############################
-    ### sum of weights and probs condition check
-    apisrs = copy(apisrs_original)
-    @test_throws ErrorException SimpleRandomSample(apisrs, weights=fill(0.3, size(apisrs_original, 1)))
-    apisrs = copy(apisrs_original)
-    @test_throws ErrorException SimpleRandomSample(apisrs, probs=fill(0.3, size(apisrs_original, 1)))
-    ##############################
-    ### weights only as Vector
-    apisrs = copy(apisrs_original)
-    srs_weights = SimpleRandomSample(apisrs; weights=apisrs.pw)
-    @test srs_weights.data.weights[1] == 30.97
-    @test srs_weights.data.weights == 1 ./ srs_weights.data.probs
-    ### probs only as Vector
-    apisrs = copy(apisrs_original)
-    srs_freq = SimpleRandomSample(apisrs; probs=apisrs.derived_probs)
-    @test srs_freq.data.weights[1] == 30.97
-    @test srs_freq.data.weights == 1 ./ srs_freq.data.probs
-    ##############################
-    ### ignorefpc tests. TODO: change if ignorefpc functionality changed
-    apisrs = copy(apisrs_original)
-    srs_ignorefpc = SimpleRandomSample(apisrs; popsize=:fpc, ignorefpc=true)
-    @test srs_ignorefpc.data.weights == 1 ./ srs_ignorefpc.data.probs # weights should be inverse of probs
-    @test srs_ignorefpc.sampsize > 0
-    ### incorrect probs with correct popsize, ignorefpc = true
-    apisrs = copy(apisrs_original)
-    srs_w_p = SimpleRandomSample(apisrs, popsize=:fpc, probs=fill(0.3, size(apisrs_original, 1)), ignorefpc=true)
-    @test srs_w_p.data.probs == 1 ./ srs_w_p.data.weights
-    ### ingorefpc = true with probs given
-    apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs, ignorefpc=true, probs=:derived_probs)
-    @test srs.data.probs == 1 ./ srs.data.weights
-    ##############################
-    ### probs as vector declared on-the-fly
-    apisrs = copy(apisrs_original)
-    srs_prob = SimpleRandomSample(apisrs; probs=1 ./ apisrs.pw)
-    @test srs_prob.data.weights[1] == 30.97
-    @test srs_prob.data.weights == 1 ./ srs_prob.data.probs
-end
-
-@testset "StratifiedSample" begin
-    ### StratifiedSample tests
-    # Load API datasets
-    apistrat_original = load_data("apistrat")
-    apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw
-    apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw
-    ##############################
-    ### Valid type checking tests
-    apistrat = copy(apistrat_original)
-    @test_throws TypeError StratifiedSample(apistrat,:stype; popsize=-2.83, ignorefpc=true)
-    @test_throws TypeError StratifiedSample(apistrat,:stype; sampsize=-300)
-    @test_throws TypeError StratifiedSample(apistrat,:stype; sampsize=-2.8, ignorefpc=true)
-    @test_throws TypeError StratifiedSample(apistrat,:stype; weights=50)
-    @test_throws TypeError StratifiedSample(apistrat,:stype; probs=1)
-    ##############################
-    ### weights as Symbol
-    apistrat = copy(apistrat_original)
-    strat_wt = StratifiedSample(apistrat, :stype; weights=:pw)
-    @test strat_wt.data.probs == 1 ./ strat_wt.data.weights
-    ### probs as Symbol
-    apistrat = copy(apistrat_original)
-    strat_probs = StratifiedSample(apistrat, :stype; probs=:derived_probs)
-    @test strat_probs.data.probs == 1 ./ strat_probs.data.weights
-    ### weights as Vector{<:Real}
-    apistrat = copy(apistrat_original)
-    strat_wt = StratifiedSample(apistrat, :stype; weights=apistrat.pw)
-    @test strat_wt.data.probs == 1 ./ strat_wt.data.weights
-    ### probs as Vector{<:Real}
-    apistrat = copy(apistrat_original)
-    strat_probs = StratifiedSample(apistrat, :stype; probs=apistrat.derived_probs)
-    @test strat_probs.data.probs == 1 ./ strat_probs.data.weights
-    ##############################
-    ### popsize as Symbol
-    apistrat = copy(apistrat_original)
-    strat_pop = StratifiedSample(apistrat, :stype; popsize=:fpc)
-    @test strat_pop.data.probs == 1 ./ strat_pop.data.weights
-    ### popsize given as Vector (should give error for now, not implemented Vector input directly for popsize)
-    apistrat = copy(apistrat_original)
-    @test_throws TypeError StratifiedSample(apistrat,:stype; popsize=apistrat.fpc)
-    ##############################
-    ### sampsize given as Symbol
-    apistrat = copy(apistrat_original)
-    strat_sampsize_sym = StratifiedSample(apistrat,:stype; sampsize=:derived_sampsize, weights=:pw)
-    @test strat_sampsize_sym.data.weights == 1 ./ strat_sampsize_sym.data.probs # weights should be inverse of probs
-    ### sampsize given as symbol without weights or probs, and popsize not given - raise error
-    apistrat = copy(apistrat_original)
-    @test_throws ErrorException StratifiedSample(apistrat,:stype; sampsize=:derived_sampsize)
-    ##############################
-    ### both weights and probs given
-    # If weights given, probs is superfluous
-    apistrat = copy(apistrat_original)
-    strat_weights_probs = StratifiedSample(apistrat,:stype; weights=:pw, probs=:derived_probs)
-    strat_weights_probs = StratifiedSample(apistrat,:stype; weights=:pw, probs=:pw)
-    ##############################
-    ### ignorefpc test (Modify if ignorefpc changed)
-    apistrat = copy(apistrat_original)
-    strat_ignorefpc=StratifiedSample(apistrat,:stype; popsize=:fpc, ignorefpc=true)
-    @test strat_ignorefpc.data.probs == 1 ./ strat_ignorefpc.data.weights
-    ##############################
-    # For now, no sum checks on probs and weights for StratifiedSample (unlike SRS)
-    apistrat = copy(apistrat_original)
-    strat_probs1 = StratifiedSample(apistrat, :stype; probs=fill(0.3, size(apistrat, 1)))
-    @test strat_probs1.data.probs == 1 ./ strat_probs1.data.weights
-    ##############################
-    #should throw error because sampsize > popsize
-    apistrat = copy(apistrat_original)
-    @test_throws ErrorException StratifiedSample(apistrat, :stype; popsize= :pw, sampsize=:fpc) 
-end
-
 @testset "SurveyDesign" begin
     # Load API datasets
     apiclus1_original = load_data("apiclus1")
@@ -172,32 +5,8 @@ end
     ##############################
     # one-stage cluster sample with popsize
     apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
-    @test dclus1.data[!,dclus1.weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
-    @test dclus1.data[!,dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
-    @test dclus1.data[!,:allprobs] ≈ dclus1.data[!,:probs] atol = 1e-4
-
-    ##############################
-    # one-stage cluster sample with weights
-    apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, :dnum; weights=:pw)
-    @test dclus1.data[!,dclus1.weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
+    dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize =:fpc)
+    @test dclus1.data[!, :weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
     @test dclus1.data[!,dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
     @test dclus1.data[!,:allprobs] ≈ dclus1.data[!,:probs] atol = 1e-4
-end
-
-# @testset "ClusterSample" begin
-#     # # Load API datasets
-#     # apiclus1_original = load_data("apiclus1")
-#     # apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
-#     # apiclus2_original = load_data("apiclus2")
-#     ##############################
-#     ### TODO when they are implemented
-#     # one-stage cluster sample
-#     # apiclus1 = copy(apiclus1_original)
-#     # dclus2 = ClusterSample(apiclus1, :dnum, :fpc)
-#     # # two-stage cluster sample
-#     # dclus2 = ClusterSample(apiclus2, [:dnum,:snum], [:fpc1,:fpc2])
-#     # # two-stage `with replacement'
-#     # dclus2wr = ClusterSample(apiclus2, [:dnum,:snum]; weights=:pw)
-# end
\ No newline at end of file
+end
\ No newline at end of file
diff --git a/test/bootstrap.jl b/test/bootstrap.jl
deleted file mode 100644
index e49c1b3f..00000000
--- a/test/bootstrap.jl
+++ /dev/null
@@ -1,10 +0,0 @@
-using Random, StatsBase
-apiclus1 = load_data("apiclus1")
-dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
-rng = MersenneTwister(111); 
-func = wsum; 
-est = Survey.bootstrap(:api00, dclus1, func; replicates=1000, rng)
-@testset "bootstrap.jl" begin
-    @test est.SE[1] ≈ 1.365925776009e6
-    @test est.statistic[1] ≈ 5.9491620666e6
-end
\ No newline at end of file
diff --git a/test/boxplot.jl b/test/boxplot.jl
index f40dd87f..563fec48 100644
--- a/test/boxplot.jl
+++ b/test/boxplot.jl
@@ -1,7 +1,7 @@
 @testset "boxplot.jl" begin
     # SimpleRandomSample
     apisrs = load_data("apisrs")
-    srs = SimpleRandomSample(apisrs,popsize = apisrs.fpc)
+    srs = srs = SurveyDesign(apisrs; weights=:pw);
     bp = boxplot(srs, :stype, :enroll; weights = :pw)
 
     @test bp.grid[1].entries[1].positional[2] == srs.data[!, :enroll]
diff --git a/test/dimnames.jl b/test/dimnames.jl
deleted file mode 100644
index 6241bd64..00000000
--- a/test/dimnames.jl
+++ /dev/null
@@ -1,16 +0,0 @@
-@testset "dimnames.jl" begin
-    # Simple random sampling tests
-    apisrs = load_data("apisrs")
-    # make a copy to not modify the original dataset
-    apisrs_copy = copy(apisrs)
-    srs = SimpleRandomSample(apisrs_copy,popsize=:fpc,ignorefpc = true)
-    # `dim`
-    @test dim(srs)[2] == 42
-    # `colnames`
-    @test length(colnames(srs)) == dim(srs)[2]
-    # `dimnames`
-    @test length(dimnames(srs)[1]) == parse(Int, last(dimnames(srs)[1]))
-    @test dimnames(srs)[2] == colnames(srs)
-
-    # Stratified sampling tests
-end
diff --git a/test/hist.jl b/test/hist.jl
index 64e260b9..f48b6d70 100644
--- a/test/hist.jl
+++ b/test/hist.jl
@@ -4,7 +4,7 @@
 
     # SimpleRandomSample
     apisrs = load_data("apisrs")
-    srs = SimpleRandomSample(apisrs,popsize=:fpc)
+    srs = srs = SurveyDesign(apisrs; weights=:pw);
 
     h = hist(srs, :enroll)
     @test h.grid[1].entries[1].positional[2] |> length == 21
diff --git a/test/jackknife.jl b/test/jackknife.jl
index 73e90f78..25e35e91 100644
--- a/test/jackknife.jl
+++ b/test/jackknife.jl
@@ -1,10 +1,8 @@
 @testset "jackknife.jl" begin
     apiclus1_original = load_data("apiclus1")
     apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
-    ##############################
-    # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
-    @test jkknife(:api00,dclus1, mean).SE[1] ≈ 26.5997 atol = 1e-4
+    dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights=:pw);
+    @test jkknife(:api00, dclus1, mean).SE[1] ≈ 26.5997 atol = 1e-4
     @test jkknife(:api00, dclus1, mean).Statistic[1] ≈ 644.1693 atol = 1e-4
 end
diff --git a/test/mean.jl b/test/mean.jl
index d2fbd8a2..6e8bea18 100644
--- a/test/mean.jl
+++ b/test/mean.jl
@@ -7,34 +7,26 @@
     ##############################
     ### Basic functionality
     apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs, popsize = :fpc)
-    @test mean(:api00, srs).mean[1] ≈ 656.585 atol = 1e-4
-    @test mean(:api00, srs).SE[1] ≈ 9.249722039282807 atol = 1e-4
-    @test mean(:enroll, srs).mean[1] ≈ 584.61 atol = 1e-4
-    @test mean(:enroll, srs).SE[1] ≈ 27.36836524766856 atol = 1e-4
-    # ignorefpc = true
-    apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs, popsize=:fpc,ignorefpc = true)
+    srs = SurveyDesign(apisrs, weights = :pw) |> bootweights 
+
     @test mean(:api00, srs).mean[1] ≈ 656.585 atol = 1e-4
-    @test mean(:api00, srs).SE[1] ≈ 9.402772170880636 atol = 1e-4
+    @test mean(:api00, srs).SE[1] ≈ 9.402772170880636 atol = 1e-1
     @test mean(:enroll, srs).mean[1] ≈ 584.61 atol = 1e-4
-    @test mean(:enroll, srs).SE[1] ≈ 27.821214737089324 atol = 1e-4
+    @test mean(:enroll, srs).SE[1] ≈ 27.821214737089324 atol = 1
     ##############################
     ### Vector of Symbols
-    apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs, popsize = :fpc)
     mean_vec_sym = mean([:api00,:enroll], srs)
     @test mean_vec_sym.mean[1] ≈ 656.585 atol = 1e-4
-    @test mean_vec_sym.SE[1] ≈ 9.249722039282807 atol = 1e-4
+    @test mean_vec_sym.SE[1] ≈ 9.49199 atol = 1e-2
     @test mean_vec_sym.mean[2] ≈ 584.61 atol = 1e-4
-    @test mean_vec_sym.SE[2] ≈ 27.36836524766856 atol = 1e-4
+    @test mean_vec_sym.SE[2] ≈ 27.9994 atol = 1e-2
     ##############################
     ### Categorical Array - estimating proportions
-    apisrs_categ = copy(apisrs_original)
-    apisrs_categ.stype = CategoricalArray(apisrs_categ.stype) # Convert a column to CategoricalArray
-    srs_design_categ = SimpleRandomSample(apisrs_categ, popsize = :fpc)
+    # apisrs_categ = copy(apisrs_original)
+    # apisrs_categ.stype = CategoricalArray(apisrs_categ.stype) # Convert a column to CategoricalArray
+    # srs_design_categ = SurveyDesign(apisrs_categ, weights = :pw)
     #>>>>>>>>> complete this suite
-    mean_categ = mean(:stype,srs_design_categ)
+    # mean_categ = mean(:stype,srs_design_categ)
     # complete this 
 end
 
@@ -63,14 +55,14 @@ end
 @testset "mean_svyby_Stratified" begin
     apistrat_original = load_data("apistrat")
     apistrat = copy(apistrat_original)
-    strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
-    mean_strat_symb = mean(:api00,:stype, strat)
+    strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
+    mean_strat_symb = mean(:api00, :stype, strat)
     @test mean_strat_symb.mean[1] ≈ 674.43 atol = 1e-2
     @test mean_strat_symb.mean[2] ≈ 636.6 atol = 1e-2
     @test mean_strat_symb.mean[3] ≈ 625.82 atol = 1e-2
-    @test mean_strat_symb.SE[1] ≈ 12.3825 atol = 1e-2
-    @test mean_strat_symb.SE[2] ≈ 16.2147 atol = 1e-2
-    @test mean_strat_symb.SE[3] ≈ 14.9371 atol = 1e-2
+    @test mean_strat_symb.SE[1] ≈ 12.6528 atol = 1e-2
+    @test mean_strat_symb.SE[2] ≈ 16.3125 atol = 1e-2
+    @test mean_strat_symb.SE[3] ≈ 15.3952 atol = 1e-2
 end
 
 @testset "mean_OneStageCluster" begin
@@ -80,8 +72,7 @@ end
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
-
-    @test mean(:api00,dclus1, Bootstrap()).mean[1] ≈ 644.17 atol = 1
-    @test mean(:api00,dclus1, Bootstrap(replicates = 10000)).SE[1] ≈ 23.779 atol = 0.5 # without fpc as it hasn't been figured out for bootstrap. 
+    dclus1 = SurveyDesign(apiclus1; clusters =  :dnum, weights = :pw) |> bootweights 
+    @test mean(:api00, dclus1).mean[1] ≈ 644.17 atol = 1e-2
+    @test mean(:api00, dclus1).SE[1] ≈  22.9042 atol = 1e-2 # without fpc as it hasn't been figured out for bootstrap. 
 end
diff --git a/test/plot.jl b/test/plot.jl
index e9e59c36..c2476f65 100644
--- a/test/plot.jl
+++ b/test/plot.jl
@@ -1,7 +1,7 @@
 @testset "plot.jl" begin
     # SimpleRandomSample
     apisrs = load_data("apisrs")
-    srs = SimpleRandomSample(apisrs,popsize=:fpc)
+    srs = SurveyDesign(apisrs, weights=:pw)
     s = plot(srs, :api99, :api00)
     @test s.grid[1].entries[1].named[:markersize] == srs.data.weights
     @test s.grid[1].entries[1].positional[1] == srs.data.api99
diff --git a/test/quantile.jl b/test/quantile.jl
index cab18fdb..59bb0a69 100644
--- a/test/quantile.jl
+++ b/test/quantile.jl
@@ -6,10 +6,10 @@
     apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1))
     ##############################
     apisrs = copy(apisrs_original)
-    srs_design = SimpleRandomSample(apisrs,popsize=:fpc)
-    @test quantile(:api00,srs_design,0.5)[!,2][1] ≈ 659.0 atol=1e-4
-    @test quantile(:api00,srs_design,[0.1753,0.25,0.5,0.75,0.975])[!,2] ≈ [512.8847,544,659,752.5,905] atol = 1e-4
-    @test quantile(:enroll,srs_design,[0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [245.5,317.6,453.0,668.5,1473.1] atol = 1e-4 
+    srs_design = SurveyDesign(apisrs; weights=:pw) 
+    @test quantile(:api00, srs_design, 0.5)[!,2][1] ≈ 659.0 atol=1e-4
+    @test quantile(:api00, srs_design, [0.1753,0.25,0.5,0.75,0.975])[!,2] ≈ [512.8847,544,659,752.5,905] atol = 1e-4
+    @test quantile(:enroll,srs_design, [0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [245.5,317.6,453.0,668.5,1473.1] atol = 1e-4 
 end
 
 @testset "quantile_Stratified" begin
@@ -20,15 +20,7 @@ end
     apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw
     # base functionality
     apistrat = copy(apistrat_original)
-    dstrat = StratifiedSample(apistrat, :stype; popsize = :fpc)
+    dstrat = SurveyDesign(apistrat; strata = :stype, popsize = :fpc)
     # Check which definition of quantile for StratifiedSample
-    # @test quantile(:enroll,dstrat,[0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [262,309.3366,446.4103,658.8764,1589.7881] atol = 1e-4 
-end
-
-@testset "quantile_by_SimpleRandomSample" begin
-    ## Add tests
-end
-
-@testset "quantile_by_Stratified" begin
-    ## Add tests
+    # @test quantile(:enroll, dstrat, [0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [262,309.3366,446.4103,658.8764,1589.7881] atol = 1e-4 
 end
\ No newline at end of file
diff --git a/test/ratio.jl b/test/ratio.jl
index d198ce1b..b8652ef1 100644
--- a/test/ratio.jl
+++ b/test/ratio.jl
@@ -4,7 +4,7 @@
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
+    dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize = :fpc)
     @test ratio(:api00, :enroll, dclus1).SE[1] ≈ 0.151242 atol = 1e-4
     @test ratio(:api00, :enroll, dclus1).Statistic[1] ≈ 1.17182 atol = 1e-4
 end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 607add34..e8f18a3a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -13,9 +13,7 @@ include("SurveyDesign.jl")
 include("total.jl")
 include("quantile.jl")
 include("mean.jl")
-include("dimnames.jl")
 include("plot.jl")
 include("hist.jl")
 include("boxplot.jl")
-include("bootstrap.jl")
 include("ratio.jl")
\ No newline at end of file
diff --git a/test/sampling.jl b/test/sampling.jl
deleted file mode 100644
index 276ed8a1..00000000
--- a/test/sampling.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-    Testing suite for sampling functions
-"""
\ No newline at end of file
diff --git a/test/total.jl b/test/total.jl
index de19c41c..0e329e76 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -1,14 +1,14 @@
-@testset "total_SimpleRandomSample" begin
+@testset "Simple random sample" begin
     apisrs_original = load_data("apisrs")
 
     # base functionality
     apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs; popsize = :fpc)
+    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
     @test tot.total[1] ≈ 4.06688749e6 atol = 1e-4
-    @test tot.SE[1] ≈ 57292.7783113177 atol = 1e-4
+    @test tot.SE[1] ≈ 292392.42247601174 atol = 1e-1
     # without fpc
-    srs_ignorefpc = SimpleRandomSample(apisrs; popsize = :fpc, ignorefpc = true)
+    srs_ignorefpc = SurveyDesign(apisrs; popsize = :fpc, ignorefpc = true)
     tot = total(:api00, srs_ignorefpc)
     # TODO: uncomment after correcting `total` function
     # @test tot.total[1] ≈ 131317 atol = 1
@@ -17,7 +17,7 @@
     # CategoricalArray
     apisrs = copy(apisrs_original)
     apisrs[!, :cname] = CategoricalArrays.categorical(apisrs.cname)
-    srs = SimpleRandomSample(apisrs; popsize = :fpc)
+    srs = SurveyDesign(apisrs; popsize = :fpc)
     tot = total(:cname, srs)
     @test size(tot)[1] == apisrs.cname |> unique |> length
     @test filter(:cname => ==("Alameda"), tot).total[1] ≈ 340.67 atol = 1e-2
@@ -27,7 +27,7 @@
 
     # Vector{Symbol}
     apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs; popsize = :fpc)
+    srs = SurveyDesign(apisrs; popsize = :fpc)
     tot = total([:api00, :enroll], srs)
     ## :api00
     @test tot.total[1] ≈ 4066888 atol = 1
@@ -38,7 +38,7 @@
 
     # subpopulation
     apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs; popsize = :fpc)
+    srs = SurveyDesign(apisrs; popsize = :fpc)
     tot = total(:api00, :cname, srs)
     @test size(tot)[1] == apisrs.cname |> unique |> length
     @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 atol = 1e-2

From eb290ebc82a4a8d6713010395a8e7dacb00bce8e Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Mon, 2 Jan 2023 22:59:02 +0530
Subject: [PATCH 06/80] All tests pass.

---
 src/mean.jl   |  4 +++-
 src/total.jl  | 15 +++++++++---
 test/mean.jl  | 26 ++++++++++-----------
 test/total.jl | 65 ++++++++++++++++++++++-----------------------------
 4 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/src/mean.jl b/src/mean.jl
index 5dc679f3..501230d7 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -79,7 +79,9 @@ julia> mean(:api00, :cname, bclus1) |> print
 """
 function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
     weighted_mean(x, w) = mean(x, StatsBase.weights(w))
-    bydomain(x, domain, design, weighted_mean)
+    df = bydomain(x, domain, design, weighted_mean)
+    rename!(df, :statistic => :mean)
+    return df
 end
 
 function mean(x::Vector{Symbol}, design::ReplicateDesign)
diff --git a/src/total.jl b/src/total.jl
index d298d62d..3a3185c0 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -4,16 +4,18 @@ julia> using Survey;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
+julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
+
 julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
 
 julia> bclus1 = bootweights(dclus1; replicates = 1000); 
 
 julia> total(:api00, bclus1)
 1×2 DataFrame
- Row │ mean       SE        
+ Row │ total      SE        
      │ Float64    Float64   
 ─────┼──────────────────────
-   1 │ 5.94916e6  2.01705e6
+   1 │ 5.94916e6  1.31977e6
 ```
 """
 function total(x::Symbol, design::ReplicateDesign)
@@ -51,5 +53,12 @@ julia> total(:api00, :cname, bclus1) |> print
 ```
 """
 function total(x::Symbol, domain::Symbol, design::ReplicateDesign)
-    bydomain(x, domain, design, wsum)
+    df = bydomain(x, domain, design, wsum)
+    rename!(df, :statistic => :total)
+end
+
+function total(x::Vector{Symbol}, design::ReplicateDesign)
+    df = reduce(vcat, [total(i, design) for i in x])
+    insertcols!(df, 1, :names => String.(x))
+    return df
 end
\ No newline at end of file
diff --git a/test/mean.jl b/test/mean.jl
index 6e8bea18..4745125b 100644
--- a/test/mean.jl
+++ b/test/mean.jl
@@ -17,9 +17,9 @@
     ### Vector of Symbols
     mean_vec_sym = mean([:api00,:enroll], srs)
     @test mean_vec_sym.mean[1] ≈ 656.585 atol = 1e-4
-    @test mean_vec_sym.SE[1] ≈ 9.49199 atol = 1e-2
+    @test mean_vec_sym.SE[1] ≈ 9.3065 atol = 1e-2
     @test mean_vec_sym.mean[2] ≈ 584.61 atol = 1e-4
-    @test mean_vec_sym.SE[2] ≈ 27.9994 atol = 1e-2
+    @test mean_vec_sym.SE[2] ≈ 28.1048 atol = 1e-2
     ##############################
     ### Categorical Array - estimating proportions
     # apisrs_categ = copy(apisrs_original)
@@ -33,23 +33,23 @@ end
 @testset "mean_Stratified" begin
     apistrat_original = load_data("apistrat")
     apistrat = copy(apistrat_original)
-    strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
+    strat = SurveyDesign(apistrat, strata = :stype, weights = :pw) |> bootweights
     mean_strat = mean(:api00, strat)
-    @test mean_strat.mean[1] ≈ 662.287 atol = 1e-2
-    @test mean_strat.SE[1] ≈ 9.40894 atol = 1e-2
+    @test mean_strat.mean[1] ≈ 662.29 atol = 1e-2
+    @test mean_strat.SE[1] ≈ 9.48296 atol = 1e-1
 end
 
 @testset "mean_svyby_SimpleRandomSample" begin
     apisrs_original = load_data("apisrs")
     apisrs = copy(apisrs_original)
-    srs = SimpleRandomSample(apisrs, popsize = :fpc)
+    srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
     mean_symb_srs = mean(:api00, :stype, srs)
     @test mean_symb_srs.mean[1] ≈ 605.36 atol = 1e-2
     @test mean_symb_srs.mean[2] ≈ 666.141 atol = 1e-2
     @test mean_symb_srs.mean[3] ≈ 654.273 atol = 1e-2
-    @test mean_symb_srs.SE[1] ≈ 21.9266 atol = 1e-2
-    @test mean_symb_srs.SE[2] ≈ 11.1935 atol = 1e-2
-    @test mean_symb_srs.SE[3] ≈ 21.8261 atol = 1e-2
+    @test mean_symb_srs.SE[1] ≈ 22.6718 atol = 1e-2
+    @test mean_symb_srs.SE[2] ≈ 11.35390 atol = 1e-2
+    @test mean_symb_srs.SE[3] ≈ 22.3298 atol = 1e-2
 end
 
 @testset "mean_svyby_Stratified" begin
@@ -60,9 +60,9 @@ end
     @test mean_strat_symb.mean[1] ≈ 674.43 atol = 1e-2
     @test mean_strat_symb.mean[2] ≈ 636.6 atol = 1e-2
     @test mean_strat_symb.mean[3] ≈ 625.82 atol = 1e-2
-    @test mean_strat_symb.SE[1] ≈ 12.6528 atol = 1e-2
-    @test mean_strat_symb.SE[2] ≈ 16.3125 atol = 1e-2
-    @test mean_strat_symb.SE[3] ≈ 15.3952 atol = 1e-2
+    @test mean_strat_symb.SE[1] ≈ 12.4398 atol = 1e-2
+    @test mean_strat_symb.SE[2] ≈ 16.5628 atol = 1e-2
+    @test mean_strat_symb.SE[3] ≈ 15.42320 atol = 1e-2
 end
 
 @testset "mean_OneStageCluster" begin
@@ -74,5 +74,5 @@ end
     apiclus1 = copy(apiclus1_original)
     dclus1 = SurveyDesign(apiclus1; clusters =  :dnum, weights = :pw) |> bootweights 
     @test mean(:api00, dclus1).mean[1] ≈ 644.17 atol = 1e-2
-    @test mean(:api00, dclus1).SE[1] ≈  22.9042 atol = 1e-2 # without fpc as it hasn't been figured out for bootstrap. 
+    @test mean(:api00, dclus1).SE[1] ≈  23.291 atol = 1e-2 # without fpc as it hasn't been figured out for bootstrap. 
 end
diff --git a/test/total.jl b/test/total.jl
index 0e329e76..5825a319 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -6,45 +6,42 @@
     srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
     @test tot.total[1] ≈ 4.06688749e6 atol = 1e-4
-    @test tot.SE[1] ≈ 292392.42247601174 atol = 1e-1
-    # without fpc
-    srs_ignorefpc = SurveyDesign(apisrs; popsize = :fpc, ignorefpc = true)
-    tot = total(:api00, srs_ignorefpc)
+    @test tot.SE[1] ≈ 60518.199 atol = 1e-1
     # TODO: uncomment after correcting `total` function
     # @test tot.total[1] ≈ 131317 atol = 1
     # @test tot.SE[1] ≈ 1880.6 atol = 1e-1
 
     # CategoricalArray
-    apisrs = copy(apisrs_original)
-    apisrs[!, :cname] = CategoricalArrays.categorical(apisrs.cname)
-    srs = SurveyDesign(apisrs; popsize = :fpc)
-    tot = total(:cname, srs)
-    @test size(tot)[1] == apisrs.cname |> unique |> length
-    @test filter(:cname => ==("Alameda"), tot).total[1] ≈ 340.67 atol = 1e-2
-    @test filter(:cname => ==("Alameda"), tot).SE[1] ≈ 98.472 atol = 1e-3
-    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 1393.65 atol = 1e-2
-    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 180.368 atol = 1e-3
+    # apisrs = copy(apisrs_original)
+    # apisrs[!, :cname] = CategoricalArrays.categorical(apisrs.cname)
+    # srs = SurveyDesign(apisrs; popsize = :fpc)
+    # tot = total(:cname, srs)
+    # @test size(tot)[1] == apisrs.cname |> unique |> length
+    # @test filter(:cname => ==("Alameda"), tot).total[1] ≈ 340.67 atol = 1e-2
+    # @test filter(:cname => ==("Alameda"), tot).SE[1] ≈ 98.472 atol = 1e-3
+    # @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 1393.65 atol = 1e-2
+    # @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 180.368 atol = 1e-3
 
     # Vector{Symbol}
     apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; popsize = :fpc)
+    srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
     tot = total([:api00, :enroll], srs)
     ## :api00
     @test tot.total[1] ≈ 4066888 atol = 1
-    @test tot.SE[1] ≈ 57293 atol = 1
+    @test tot.SE[1] ≈ 60518.199 atol = 1
     ## :enroll
     @test tot.total[2] ≈ 3621074 atol = 1
-    @test tot.SE[2] ≈ 169520 atol = 1
+    @test tot.SE[2] ≈ 173784.343 atol = 1
 
     # subpopulation
     apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; popsize = :fpc)
+    srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
     tot = total(:api00, :cname, srs)
     @test size(tot)[1] == apisrs.cname |> unique |> length
     @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 atol = 1e-2
-    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122289.00 atol = 1e-2
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122366.33 atol = 1e-2
     @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 atol = 1e-2
-    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 37616.17 atol = 1e-2
+    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38178.35 atol = 1e-2
 end
 
 @testset "total_Stratified" begin
@@ -52,22 +49,18 @@ end
 
     # base functionality
     apistrat = copy(apistrat_original)
-    strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
+    strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
     tot = total(:api00, strat)
-    @test tot.total[1] ≈ 4102208 atol = 1e-1
-    @test tot.SE[1] ≈ 58279 atol = 1e-1
+    @test tot.total[1] ≈ 4102208 atol = 10
+    @test tot.SE[1] ≈ 77211.61 atol = 1e-1
     # without fpc
-    apistrat = copy(apistrat_original)
-    strat_ignorefpc = StratifiedSample(apistrat, :stype; popsize = :fpc, ignorefpc = true)
-    tot = total(:api00, strat_ignorefpc)
-    @test tot.total[1] ≈ 130564 atol = 1e-4
     # TODO: uncomment after correcting `total` function
     # @test tot.SE[1] ≈ 1690.4 atol = 1e-1
 
     # CategoricalArray
-    apistrat = copy(apistrat_original)
-    apistrat[!, :cname] = CategoricalArrays.categorical(apistrat.cname)
-    strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
+    # apistrat = copy(apistrat_original)
+    # apistrat[!, :cname] = CategoricalArrays.categorical(apistrat.cname)
+    # strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
     # TODO: uncomment after adding `CategoricalArray` support
     # @test tot.SE[1] ≈ 1690.4 atol = 1e-1
     # tot = total(:cname, strat)
@@ -78,15 +71,13 @@ end
     # @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 199.635 atol = 1e-3
 
     # Vector{Symbol}
-    apistrat = copy(apistrat_original)
-    strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
     tot = total([:api00, :enroll], strat)
     ## :api00
     @test tot.total[1] ≈ 4102208 atol = 1
-    @test tot.SE[1] ≈ 58279 atol = 1
+    @test tot.SE[1] ≈ 77211.61 atol = 1
     ## :enroll
     @test tot.total[2] ≈ 3687178 atol = 1
-    @test tot.SE[2] ≈ 114642 atol = 1
+    @test tot.SE[2] ≈ 127021.5540 atol = 1
 
     # subpopulation
     # TODO: add functionality in `src/total.jl`
@@ -100,11 +91,11 @@ end
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, :dnum, :fpc)
+    dclus1 = SurveyDesign(apiclus1, clusters = :dnum, weights = :pw) |> bootweights
     @test total(:api00,dclus1).total[1] ≈ 5949162 atol = 1
-    @test total(:api00,dclus1).SE[1] ≈ 1339481 atol = 1
+    @test total(:api00,dclus1).SE[1] ≈ 1.3338978891316957e6 atol = 1
 
-    @test total(:api00,dclus1, Bootstrap()).total[1] ≈ 5949162 atol = 1
-    @test total(:api00,dclus1, Bootstrap(replicates = 10000)).SE[1] ≈ 1352953 atol = 50000 # without fpc as it hasn't been figured out for bootstrap. 
+    @test total(:api00, dclus1).total[1] ≈ 5949162 atol = 1
+    @test total(:api00, dclus1).SE[1] ≈ 1352953 atol = 50000 # without fpc as it hasn't been figured out for bootstrap. 
     
 end
\ No newline at end of file

From f9efaea9f7314ae4ed61ff0aec0b9a0c40c11c3a Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Wed, 4 Jan 2023 21:19:10 +0530
Subject: [PATCH 07/80] Fix bug in total.

---
 Project.toml     |  1 +
 src/Survey.jl    |  1 +
 src/bootstrap.jl | 41 +++++++++++++++++++++++++++++++----------
 src/by.jl        |  2 +-
 src/mean.jl      | 12 ++++++------
 src/total.jl     |  2 +-
 6 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/Project.toml b/Project.toml
index 2659ebc2..f288d231 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,6 +10,7 @@ CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
diff --git a/src/Survey.jl b/src/Survey.jl
index f6d3d030..fee13d6d 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -11,6 +11,7 @@ using CairoMakie
 using AlgebraOfGraphics
 using CategoricalArrays
 using Random
+using Missings
 
 include("SurveyDesign.jl")
 include("bootstrap.jl")
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 70df8a81..a28fabe3 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -26,27 +26,48 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
     H = length(unique(design.data[!, design.strata]))
     stratified = groupby(design.data, design.strata)
     function replicate(stratified, H)
-        for j in 1:H
-            substrata = DataFrame(stratified[j])
+        for h in 1:H
+            substrata = DataFrame(stratified[h])
             psus = unique(substrata[!, design.cluster])
-            if length(psus) == 1
-                return DataFrame(statistic = X, SE = 0)
+            # @show psus
+            if length(psus) <= 1
+                return DataFrame(statistic = X, SE = 0) # bug! 
             end
             nh = length(psus)
             randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  
             rh = [(count(==(i), randinds)) for i in 1:nh] # main bootstrap algo. 
             gdf = groupby(substrata, design.cluster)
+            # @show keys(gdf)
             for i in 1:nh
-                gdf[i].rh = repeat([rh[i]], nrow(gdf[i]))
-            end
-            stratified[j].rh = DataFrame(gdf).rh
+                gdf[i].whij = repeat([rh[i]], nrow(gdf[i])) .* gdf[i].weights .* (nh / (nh - 1))
+            end            
+            stratified[h].whij = transform(gdf).whij
+            
         end
-        return DataFrame(stratified)
+        return transform(stratified, :whij)
     end
     df = replicate(stratified, H)
-    rename!(df,:rh => :replicate_1)
+    rename!(df,:whij => :replicate_1)
+    df.replicate_1 = disallowmissing(df.replicate_1)
     for i in 2:(replicates)
-        df[!, "replicate_"*string(i)] = Float64.(replicate(stratified, H).rh)
+        df[!, "replicate_"*string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
+end
+
+function bootstrap(x::Symbol, design::SurveyDesign, func = wsum; replicates = 100, rng = MersenneTwister(1234))
+    gdf = groupby(design.data, design.cluster)
+    psus = unique(design.data[!, design.cluster])
+    nh = length(psus)
+    X = func(design.data[:, x], design.data.weights)
+    Xt = Array{Float64, 1}(undef, replicates)
+    for i in 1:replicates
+        selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
+        xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
+        whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
+        Xt[i] = func(xhij, whij)
+    end 
+    @show Xt
+    variance = sum((Xt .- X).^2) / replicates
+    return DataFrame(statistic = X, SE = sqrt(variance))
 end
\ No newline at end of file
diff --git a/src/by.jl b/src/by.jl
index 30cb2dd2..be26d5a3 100644
--- a/src/by.jl
+++ b/src/by.jl
@@ -4,7 +4,7 @@ function bydomain(x::Symbol, domain::Symbol, design::ReplicateDesign, func::Func
     X = combine(gdf, [x, :weights] => ((a, b) -> func(a, weights(b))) => :statistic)
     Xt_mat = Array{Float64, 2}(undef, (nd, design.replicates))
     for i in 1:design.replicates
-        Xt_mat[:, i] = combine(gdf, [x, :weights, Symbol("replicate_"*string(i))] => ((a, b, c) -> func(a, weights(b .* c))) => :statistic).statistic
+        Xt_mat[:, i] = combine(gdf, [x, Symbol("replicate_"*string(i))] => ((a, c) -> func(a, weights(c))) => :statistic).statistic
     end
     ses = []
     for i in 1:nd
diff --git a/src/mean.jl b/src/mean.jl
index 501230d7..2bf8b925 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -4,21 +4,21 @@ julia> using Survey, Random, StatsBase;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
 
-julia> bclus1 = bootweights(apiclus1; replicates = 1000)
+julia> bclus1 = bootweights(dclus1; replicates = 1000)
 
 julia> mean(:api00, bclus1)
 1×2 DataFrame
  Row │ mean     SE      
      │ Float64  Float64 
 ─────┼──────────────────
-   1 │ 644.169  23.0897
+   1 │ 644.169  23.7208
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
     X = mean(design.data[!, x], weights(design.data.weights))
-    Xt = [mean(design.data[!, x], weights(design.data.weights .* design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
+    Xt = [mean(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(mean = X, SE = sqrt(variance))
 end
@@ -28,9 +28,9 @@ julia> using Survey, Random, StatsBase;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
 
-julia> bclus1 = bootweights(apiclus1; replicates = 1000)
+julia> bclus1 = bootweights(dclus1; replicates = 1000)
 
 julia> mean(:api00, :cname, bclus1) |> print
 38×3 DataFrame
diff --git a/src/total.jl b/src/total.jl
index 3a3185c0..fdf83216 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -20,7 +20,7 @@ julia> total(:api00, bclus1)
 """
 function total(x::Symbol, design::ReplicateDesign)
     X = wsum(design.data[!, x], weights(design.data.weights))
-    Xt = [wsum(design.data[!, x], weights(design.data.weights .* design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
+    Xt = [wsum(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(total = X, SE = sqrt(variance))
 end

From 7a65293550935ef7355a516e3a8deec3985cef7f Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Thu, 5 Jan 2023 11:18:08 +0530
Subject: [PATCH 08/80] Remove comments and unused function.

---
 src/bootstrap.jl | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index a28fabe3..9dd605c1 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -29,7 +29,6 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
         for h in 1:H
             substrata = DataFrame(stratified[h])
             psus = unique(substrata[!, design.cluster])
-            # @show psus
             if length(psus) <= 1
                 return DataFrame(statistic = X, SE = 0) # bug! 
             end
@@ -37,7 +36,6 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
             randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  
             rh = [(count(==(i), randinds)) for i in 1:nh] # main bootstrap algo. 
             gdf = groupby(substrata, design.cluster)
-            # @show keys(gdf)
             for i in 1:nh
                 gdf[i].whij = repeat([rh[i]], nrow(gdf[i])) .* gdf[i].weights .* (nh / (nh - 1))
             end            
@@ -53,21 +51,4 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
         df[!, "replicate_"*string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
-end
-
-function bootstrap(x::Symbol, design::SurveyDesign, func = wsum; replicates = 100, rng = MersenneTwister(1234))
-    gdf = groupby(design.data, design.cluster)
-    psus = unique(design.data[!, design.cluster])
-    nh = length(psus)
-    X = func(design.data[:, x], design.data.weights)
-    Xt = Array{Float64, 1}(undef, replicates)
-    for i in 1:replicates
-        selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
-        xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
-        whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
-        Xt[i] = func(xhij, whij)
-    end 
-    @show Xt
-    variance = sum((Xt .- X).^2) / replicates
-    return DataFrame(statistic = X, SE = sqrt(variance))
 end
\ No newline at end of file

From af7ee993962a6c710b42c23c2fb757b09a30d25b Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Thu, 5 Jan 2023 11:27:03 +0530
Subject: [PATCH 09/80] Attemp bug fix.

---
 src/bootstrap.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 9dd605c1..2055b6bf 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -30,7 +30,7 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
             substrata = DataFrame(stratified[h])
             psus = unique(substrata[!, design.cluster])
             if length(psus) <= 1
-                return DataFrame(statistic = X, SE = 0) # bug! 
+                stratified[h].whij .= 0 # hasn't been tested yet. 
             end
             nh = length(psus)
             randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  

From 9c0b68547be12744fa4390e5be8554615f89c4d7 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Mon, 9 Jan 2023 18:11:31 +0530
Subject: [PATCH 10/80] Fix and add tests for SRS for `total`

---
 test/total.jl | 144 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 129 insertions(+), 15 deletions(-)

diff --git a/test/total.jl b/test/total.jl
index 5825a319..4c3788a8 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -5,11 +5,20 @@
     apisrs = copy(apisrs_original)
     srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
-    @test tot.total[1] ≈ 4.06688749e6 atol = 1e-4
-    @test tot.SE[1] ≈ 60518.199 atol = 1e-1
-    # TODO: uncomment after correcting `total` function
-    # @test tot.total[1] ≈ 131317 atol = 1
-    # @test tot.SE[1] ≈ 1880.6 atol = 1e-1
+    @test tot.total[1] ≈ 4066888 rtol = 1e-5
+    @test tot.SE[1] ≈ 58526 rtol = 1e-1
+    mn = mean(:api00, srs)
+    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
+    @test mn.SE[1] ≈ 9.4488 rtol = 1e-1
+    # equivalent R code and results:
+    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
+    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    # > svytotal(~api00, srsrep)
+    #         total    SE
+    # api00 4066888 58526
+    # > svymean(~api00, srsrep)
+    #         mean     SE
+    # api00 656.58 9.4488
 
     # CategoricalArray
     # apisrs = copy(apisrs_original)
@@ -24,24 +33,129 @@
 
     # Vector{Symbol}
     apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
+    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total([:api00, :enroll], srs)
+    mn = mean([:api00, :enroll], srs)
     ## :api00
-    @test tot.total[1] ≈ 4066888 atol = 1
-    @test tot.SE[1] ≈ 60518.199 atol = 1
+    @test tot.total[1] ≈ 4066888 rtol = 1e-5
+    @test tot.SE[1] ≈ 57502 rtol = 1e-1
+    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
+    @test mn.SE[1] ≈ 9.2835 rtol = 1e-1
     ## :enroll
-    @test tot.total[2] ≈ 3621074 atol = 1
-    @test tot.SE[2] ≈ 173784.343 atol = 1
+    @test tot.total[2] ≈ 3621074 rtol = 1e-5
+    @test tot.SE[2] ≈ 176793 rtol = 1e-1
+    @test mn.mean[2] ≈ 584.61 rtol = 1e-5
+    @test mn.SE[2] ≈ 28.5427 rtol = 1e-1
+    # equivalent R code and results:
+    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
+    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    # > svytotal(~api00+~enroll, srsrep)
+    #         total     SE
+    # api00  4066888  57502
+    # enroll 3621074 176793
+    # > svymean(~api00+~enroll, srsrep)
+    #         mean      SE
+    # api00  656.58  9.2835
+    # enroll 584.61 28.5427
 
     # subpopulation
     apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
+    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, :cname, srs)
     @test size(tot)[1] == apisrs.cname |> unique |> length
-    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 atol = 1e-2
-    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122366.33 atol = 1e-2
-    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 atol = 1e-2
-    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38178.35 atol = 1e-2
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 rtol = 1e-5
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122193.02 rtol = 1e-1
+    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 rtol = 1e-5
+    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38862.71 rtol = 1e-1
+    mn = mean(:api00, :cname, srs)
+    @test size(mn)[1] == apisrs.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 658.1556 rtol = 1e-5
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 2.126852e+01 rtol = 1e-1
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 718.2857 rtol = 1e-5
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 5.835346e+01 rtol = 1e-1
+    # equivalent R code and results:
+    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
+    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    # > svyby(~api00, ~cname, srsrep, svytotal)
+    #                         cname    api00           se
+    # Alameda                 Alameda 230323.89  67808.91
+    # Calaveras             Calaveras  24466.30  24199.26
+    # Contra Costa       Contra Costa 213538.15  68780.65
+    # Fresno                   Fresno 148717.94  54174.78
+    # Imperial               Imperial  19263.34  19292.34
+    # Kern                       Kern 177643.92  56429.75
+    # Kings                     Kings  29080.83  20659.88
+    # Lake                       Lake  24899.88  24796.24
+    # Lassen                   Lassen  23289.44  23150.91
+    # Los Angeles         Los Angeles 917238.49 122193.02
+    # Madera                   Madera  44596.80  25684.62
+    # Marin                     Marin  74297.03  43018.64
+    # Merced                   Merced  18427.15  18057.21
+    # Modoc                     Modoc  20780.87  20977.35
+    # Monterey               Monterey  74947.40  38862.71
+    # Napa                       Napa  45030.38  31747.05
+    # Orange                   Orange 208861.68  66824.94
+    # Placer                   Placer  23506.23  23426.32
+    # Riverside             Riverside 177860.71  55697.57
+    # Sacramento           Sacramento 152620.16  53266.09
+    # San Bernardino   San Bernardino 247388.36  66806.58
+    # San Diego             San Diego 254387.58  71730.93
+    # San Francisco     San Francisco  51874.75  29597.88
+    # San Joaquin         San Joaquin 113102.44  46195.42
+    # San Luis Obispo San Luis Obispo  22886.83  22984.23
+    # San Mateo             San Mateo  38216.98  27075.67
+    # Santa Barbara     Santa Barbara  67700.42  38550.72
+    # Santa Clara         Santa Clara 155717.16  58101.15
+    # Santa Cruz           Santa Cruz  58006.81  34633.27
+    # Shasta                   Shasta  46702.76  32882.09
+    # Siskiyou               Siskiyou  21648.03  21667.03
+    # Solano                   Solano  57882.93  33095.96
+    # Sonoma                   Sonoma  19511.10  19782.71
+    # Stanislaus           Stanislaus  68412.73  39997.43
+    # Sutter                   Sutter  23041.68  22738.16
+    # Tulare                   Tulare  41128.16  28933.90
+    # Ventura                 Ventura 115177.43  51200.56
+    # Yolo                       Yolo  14710.75  14676.49
+    # > svyby(~api00, ~cname, srsrep, svymean)
+    #                         cname    api00           se
+    # Alameda                 Alameda 676.0909 3.522082e+01
+    # Calaveras             Calaveras 790.0000 0.000000e+00
+    # Contra Costa       Contra Costa 766.1111 5.435054e+01
+    # Fresno                   Fresno 600.2500 5.811781e+01
+    # Imperial               Imperial 622.0000 0.000000e+00
+    # Kern                       Kern 573.6000 4.634744e+01
+    # Kings                     Kings 469.5000 4.264356e+01
+    # Lake                       Lake 804.0000 0.000000e+00
+    # Lassen                   Lassen 752.0000 0.000000e+00
+    # Los Angeles         Los Angeles 658.1556 2.126852e+01
+    # Madera                   Madera 480.0000 3.461786e+00
+    # Marin                     Marin 799.6667 3.509912e+01
+    # Merced                   Merced 595.0000 0.000000e+00
+    # Modoc                     Modoc 671.0000 0.000000e+00
+    # Monterey               Monterey 605.0000 8.356655e+01
+    # Napa                       Napa 727.0000 4.770914e+01
+    # Orange                   Orange 749.3333 2.876956e+01
+    # Placer                   Placer 759.0000 0.000000e+00
+    # Riverside             Riverside 574.3000 2.789294e+01
+    # Sacramento           Sacramento 616.0000 3.785063e+01
+    # San Bernardino   San Bernardino 614.4615 2.985197e+01
+    # San Diego             San Diego 684.5000 3.254291e+01
+    # San Francisco     San Francisco 558.3333 4.404227e+01
+    # San Joaquin         San Joaquin 608.6667 4.153241e+01
+    # San Luis Obispo San Luis Obispo 739.0000 2.691382e-14
+    # San Mateo             San Mateo 617.0000 7.352923e+01
+    # Santa Barbara     Santa Barbara 728.6667 2.551393e+01
+    # Santa Clara         Santa Clara 718.2857 5.835346e+01
+    # Santa Cruz           Santa Cruz 624.3333 1.131098e+02
+    # Shasta                   Shasta 754.0000 5.731963e+01
+    # Siskiyou               Siskiyou 699.0000 0.000000e+00
+    # Solano                   Solano 623.0000 4.541173e+01
+    # Sonoma                   Sonoma 630.0000 0.000000e+00
+    # Stanislaus           Stanislaus 736.3333 5.176843e+00
+    # Sutter                   Sutter 744.0000 0.000000e+00
+    # Tulare                   Tulare 664.0000 2.061011e+01
+    # Ventura                 Ventura 743.8000 3.153839e+01
+    # Yolo                       Yolo 475.0000 0.000000e+00
 end
 
 @testset "total_Stratified" begin

From a5f3a4e13e858041b8ea4c2dd3460fcc014eae43 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Mon, 9 Jan 2023 18:44:17 +0530
Subject: [PATCH 11/80] Fix and add tests for Stratified, add constants for
 tolerances

---
 test/total.jl | 213 ++++++++++++++++----------------------------------
 1 file changed, 69 insertions(+), 144 deletions(-)

diff --git a/test/total.jl b/test/total.jl
index 4c3788a8..9b7c8e56 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -1,3 +1,6 @@
+const STAT_TOL = 1e-5
+const SE_TOL = 1e-1
+
 @testset "Simple random sample" begin
     apisrs_original = load_data("apisrs")
 
@@ -5,11 +8,11 @@
     apisrs = copy(apisrs_original)
     srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
-    @test tot.total[1] ≈ 4066888 rtol = 1e-5
-    @test tot.SE[1] ≈ 58526 rtol = 1e-1
+    @test tot.total[1] ≈ 4066888 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 58526 rtol = SE_TOL
     mn = mean(:api00, srs)
-    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
-    @test mn.SE[1] ≈ 9.4488 rtol = 1e-1
+    @test mn.mean[1] ≈ 656.58 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.4488 rtol = SE_TOL
     # equivalent R code and results:
     # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
     # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
@@ -20,35 +23,20 @@
     #         mean     SE
     # api00 656.58 9.4488
 
-    # CategoricalArray
-    # apisrs = copy(apisrs_original)
-    # apisrs[!, :cname] = CategoricalArrays.categorical(apisrs.cname)
-    # srs = SurveyDesign(apisrs; popsize = :fpc)
-    # tot = total(:cname, srs)
-    # @test size(tot)[1] == apisrs.cname |> unique |> length
-    # @test filter(:cname => ==("Alameda"), tot).total[1] ≈ 340.67 atol = 1e-2
-    # @test filter(:cname => ==("Alameda"), tot).SE[1] ≈ 98.472 atol = 1e-3
-    # @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 1393.65 atol = 1e-2
-    # @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 180.368 atol = 1e-3
-
     # Vector{Symbol}
-    apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total([:api00, :enroll], srs)
     mn = mean([:api00, :enroll], srs)
     ## :api00
-    @test tot.total[1] ≈ 4066888 rtol = 1e-5
-    @test tot.SE[1] ≈ 57502 rtol = 1e-1
-    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
-    @test mn.SE[1] ≈ 9.2835 rtol = 1e-1
+    @test tot.total[1] ≈ 4066888 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 57502 rtol = SE_TOL
+    @test mn.mean[1] ≈ 656.58 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.2835 rtol = SE_TOL
     ## :enroll
-    @test tot.total[2] ≈ 3621074 rtol = 1e-5
-    @test tot.SE[2] ≈ 176793 rtol = 1e-1
-    @test mn.mean[2] ≈ 584.61 rtol = 1e-5
-    @test mn.SE[2] ≈ 28.5427 rtol = 1e-1
+    @test tot.total[2] ≈ 3621074 rtol = STAT_TOL
+    @test tot.SE[2] ≈ 176793 rtol = SE_TOL
+    @test mn.mean[2] ≈ 584.61 rtol = STAT_TOL
+    @test mn.SE[2] ≈ 28.5427 rtol = SE_TOL
     # equivalent R code and results:
-    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
-    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
     # > svytotal(~api00+~enroll, srsrep)
     #         total     SE
     # api00  4066888  57502
@@ -59,146 +47,83 @@
     # enroll 584.61 28.5427
 
     # subpopulation
-    apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, :cname, srs)
     @test size(tot)[1] == apisrs.cname |> unique |> length
-    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 rtol = 1e-5
-    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122193.02 rtol = 1e-1
-    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 rtol = 1e-5
-    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38862.71 rtol = 1e-1
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122193.02 rtol = SE_TOL
+    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 rtol = STAT_TOL
+    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38862.71 rtol = SE_TOL
     mn = mean(:api00, :cname, srs)
     @test size(mn)[1] == apisrs.cname |> unique |> length
-    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 658.1556 rtol = 1e-5
-    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 2.126852e+01 rtol = 1e-1
-    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 718.2857 rtol = 1e-5
-    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 5.835346e+01 rtol = 1e-1
-    # equivalent R code and results:
-    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
-    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 658.1556 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 2.126852e+01 rtol = SE_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 718.2857 rtol = STAT_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 5.835346e+01 rtol = SE_TOL
+    # equivalent R code (results cause clutter):
     # > svyby(~api00, ~cname, srsrep, svytotal)
-    #                         cname    api00           se
-    # Alameda                 Alameda 230323.89  67808.91
-    # Calaveras             Calaveras  24466.30  24199.26
-    # Contra Costa       Contra Costa 213538.15  68780.65
-    # Fresno                   Fresno 148717.94  54174.78
-    # Imperial               Imperial  19263.34  19292.34
-    # Kern                       Kern 177643.92  56429.75
-    # Kings                     Kings  29080.83  20659.88
-    # Lake                       Lake  24899.88  24796.24
-    # Lassen                   Lassen  23289.44  23150.91
-    # Los Angeles         Los Angeles 917238.49 122193.02
-    # Madera                   Madera  44596.80  25684.62
-    # Marin                     Marin  74297.03  43018.64
-    # Merced                   Merced  18427.15  18057.21
-    # Modoc                     Modoc  20780.87  20977.35
-    # Monterey               Monterey  74947.40  38862.71
-    # Napa                       Napa  45030.38  31747.05
-    # Orange                   Orange 208861.68  66824.94
-    # Placer                   Placer  23506.23  23426.32
-    # Riverside             Riverside 177860.71  55697.57
-    # Sacramento           Sacramento 152620.16  53266.09
-    # San Bernardino   San Bernardino 247388.36  66806.58
-    # San Diego             San Diego 254387.58  71730.93
-    # San Francisco     San Francisco  51874.75  29597.88
-    # San Joaquin         San Joaquin 113102.44  46195.42
-    # San Luis Obispo San Luis Obispo  22886.83  22984.23
-    # San Mateo             San Mateo  38216.98  27075.67
-    # Santa Barbara     Santa Barbara  67700.42  38550.72
-    # Santa Clara         Santa Clara 155717.16  58101.15
-    # Santa Cruz           Santa Cruz  58006.81  34633.27
-    # Shasta                   Shasta  46702.76  32882.09
-    # Siskiyou               Siskiyou  21648.03  21667.03
-    # Solano                   Solano  57882.93  33095.96
-    # Sonoma                   Sonoma  19511.10  19782.71
-    # Stanislaus           Stanislaus  68412.73  39997.43
-    # Sutter                   Sutter  23041.68  22738.16
-    # Tulare                   Tulare  41128.16  28933.90
-    # Ventura                 Ventura 115177.43  51200.56
-    # Yolo                       Yolo  14710.75  14676.49
     # > svyby(~api00, ~cname, srsrep, svymean)
-    #                         cname    api00           se
-    # Alameda                 Alameda 676.0909 3.522082e+01
-    # Calaveras             Calaveras 790.0000 0.000000e+00
-    # Contra Costa       Contra Costa 766.1111 5.435054e+01
-    # Fresno                   Fresno 600.2500 5.811781e+01
-    # Imperial               Imperial 622.0000 0.000000e+00
-    # Kern                       Kern 573.6000 4.634744e+01
-    # Kings                     Kings 469.5000 4.264356e+01
-    # Lake                       Lake 804.0000 0.000000e+00
-    # Lassen                   Lassen 752.0000 0.000000e+00
-    # Los Angeles         Los Angeles 658.1556 2.126852e+01
-    # Madera                   Madera 480.0000 3.461786e+00
-    # Marin                     Marin 799.6667 3.509912e+01
-    # Merced                   Merced 595.0000 0.000000e+00
-    # Modoc                     Modoc 671.0000 0.000000e+00
-    # Monterey               Monterey 605.0000 8.356655e+01
-    # Napa                       Napa 727.0000 4.770914e+01
-    # Orange                   Orange 749.3333 2.876956e+01
-    # Placer                   Placer 759.0000 0.000000e+00
-    # Riverside             Riverside 574.3000 2.789294e+01
-    # Sacramento           Sacramento 616.0000 3.785063e+01
-    # San Bernardino   San Bernardino 614.4615 2.985197e+01
-    # San Diego             San Diego 684.5000 3.254291e+01
-    # San Francisco     San Francisco 558.3333 4.404227e+01
-    # San Joaquin         San Joaquin 608.6667 4.153241e+01
-    # San Luis Obispo San Luis Obispo 739.0000 2.691382e-14
-    # San Mateo             San Mateo 617.0000 7.352923e+01
-    # Santa Barbara     Santa Barbara 728.6667 2.551393e+01
-    # Santa Clara         Santa Clara 718.2857 5.835346e+01
-    # Santa Cruz           Santa Cruz 624.3333 1.131098e+02
-    # Shasta                   Shasta 754.0000 5.731963e+01
-    # Siskiyou               Siskiyou 699.0000 0.000000e+00
-    # Solano                   Solano 623.0000 4.541173e+01
-    # Sonoma                   Sonoma 630.0000 0.000000e+00
-    # Stanislaus           Stanislaus 736.3333 5.176843e+00
-    # Sutter                   Sutter 744.0000 0.000000e+00
-    # Tulare                   Tulare 664.0000 2.061011e+01
-    # Ventura                 Ventura 743.8000 3.153839e+01
-    # Yolo                       Yolo 475.0000 0.000000e+00
 end
 
-@testset "total_Stratified" begin
+@testset "Stratified sample" begin
     apistrat_original = load_data("apistrat")
 
     # base functionality
     apistrat = copy(apistrat_original)
     strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
     tot = total(:api00, strat)
-    @test tot.total[1] ≈ 4102208 atol = 10
-    @test tot.SE[1] ≈ 77211.61 atol = 1e-1
-    # without fpc
-    # TODO: uncomment after correcting `total` function
-    # @test tot.SE[1] ≈ 1690.4 atol = 1e-1
-
-    # CategoricalArray
-    # apistrat = copy(apistrat_original)
-    # apistrat[!, :cname] = CategoricalArrays.categorical(apistrat.cname)
-    # strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
-    # TODO: uncomment after adding `CategoricalArray` support
-    # @test tot.SE[1] ≈ 1690.4 atol = 1e-1
-    # tot = total(:cname, strat)
-    # @test size(tot)[1] == apistrat.cname |> unique |> length
-    # @test filter(:cname => ==("Kern"), tot).total[1] ≈ 291.97 atol = 1e-2
-    # @test filter(:cname => ==("Kern"), tot).SE[1] ≈ 101.760 atol = 1e-3
-    # @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 1373.15 atol = 1e-2
-    # @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 199.635 atol = 1e-3
+    @test tot.total[1] ≈ 4102208 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 60746 rtol = SE_TOL
+    @test mn.mean[1] ≈ 662.29 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.8072 rtol = SE_TOL
+    # equivalent R code and results:
+    # > strat <- svydesign(data=apistrat, id=~1, weights=~pw, strata=~stype)
+    # > stratrep <- as.svrepdesign(strat, type="bootstrap", replicates=4000)
+    # > svytotal(~api00, stratrep)
+    #     total    SE
+    # api00 4102208 60746
+    # > svymean(~api00, stratrep)
+    #     mean     SE
+    # api00 662.29 9.8072
 
     # Vector{Symbol}
     tot = total([:api00, :enroll], strat)
+    mn = mean([:api00, :enroll], strat)
     ## :api00
-    @test tot.total[1] ≈ 4102208 atol = 1
-    @test tot.SE[1] ≈ 77211.61 atol = 1
+    @test tot.total[1] ≈ 4102208 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 60746 rtol = SE_TOL
+    @test mn.mean[1] ≈ 662.29 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.8072 rtol = SE_TOL
     ## :enroll
-    @test tot.total[2] ≈ 3687178 atol = 1
-    @test tot.SE[2] ≈ 127021.5540 atol = 1
+    @test tot.total[2] ≈ 3687178 rtol = STAT_TOL
+    @test tot.SE[2] ≈ 117322 rtol = SE_TOL
+    @test mn.mean[2] ≈ 595.28 rtol = STAT_TOL
+    @test mn.SE[2] ≈ 18.9412 rtol = SE_TOL
+    # equivalent R code and results:
+    # > svytotal(~api00+~enroll, stratrep)
+    # > svymean(~api00+~enroll, stratrep)
+    #         mean      SE
+    # api00  662.29  9.8072
+    # enroll 595.28 18.9412
 
     # subpopulation
-    # TODO: add functionality in `src/total.jl`
-    # TODO: add tests
+    tot = total(:api00, :cname, strat)
+    @test size(tot)[1] == apistrat.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 869905.98 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 134195.81 rtol = SE_TOL
+    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 72103.09 rtol = STAT_TOL
+    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 45532.88 rtol = SE_TOL
+    mn = mean(:api00, :cname, strat)
+    @test size(mn)[1] == apistrat.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 633.5113 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 21.681068 rtol = SE_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 664.1212 rtol = STAT_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 48.817277 rtol = SE_TOL
+    # equivalent R code (results cause clutter):
+    # > svyby(~api00, ~cname, stratrep, svytotal)
+    # > svyby(~api00, ~cname, stratrep, svymean)
 end
 
-@testset "total_OneStageClusterSample" begin
+@testset "One stage cluster sample" begin
     # Load API datasets
     apiclus1_original = load_data("apiclus1")
     apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column

From 87c11dc01c23e55da98dcfd2cfe6dc58b573d72f Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Mon, 9 Jan 2023 19:55:15 +0530
Subject: [PATCH 12/80] Fix and add tests for Cluster and minor reordering

---
 test/total.jl | 87 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/test/total.jl b/test/total.jl
index 9b7c8e56..6ac6ab06 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -1,12 +1,11 @@
 const STAT_TOL = 1e-5
 const SE_TOL = 1e-1
 
-@testset "Simple random sample" begin
-    apisrs_original = load_data("apisrs")
+@testset "total SRS" begin
+    apisrs = load_data("apisrs")
+    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
 
     # base functionality
-    apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
     @test tot.total[1] ≈ 4066888 rtol = STAT_TOL
     @test tot.SE[1] ≈ 58526 rtol = SE_TOL
@@ -64,15 +63,15 @@ const SE_TOL = 1e-1
     # > svyby(~api00, ~cname, srsrep, svymean)
 end
 
-@testset "Stratified sample" begin
-    apistrat_original = load_data("apistrat")
+@testset "total Stratified" begin
+    apistrat = load_data("apistrat")
+    strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
 
     # base functionality
-    apistrat = copy(apistrat_original)
-    strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
     tot = total(:api00, strat)
     @test tot.total[1] ≈ 4102208 rtol = STAT_TOL
     @test tot.SE[1] ≈ 60746 rtol = SE_TOL
+    mn = mean(:api00, strat)
     @test mn.mean[1] ≈ 662.29 rtol = STAT_TOL
     @test mn.SE[1] ≈ 9.8072 rtol = SE_TOL
     # equivalent R code and results:
@@ -123,18 +122,64 @@ end
     # > svyby(~api00, ~cname, stratrep, svymean)
 end
 
-@testset "One stage cluster sample" begin
-    # Load API datasets
-    apiclus1_original = load_data("apiclus1")
-    apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
-    ##############################
-    # one-stage cluster sample
-    apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, clusters = :dnum, weights = :pw) |> bootweights
-    @test total(:api00,dclus1).total[1] ≈ 5949162 atol = 1
-    @test total(:api00,dclus1).SE[1] ≈ 1.3338978891316957e6 atol = 1
+@testset "total Cluster" begin
+    apiclus1 = load_data("apiclus1")
+    clus1 = SurveyDesign(apiclus1, clusters = :dnum, weights = :pw) |> bootweights
+
+    # base functionality
+    tot = total(:api00, clus1)
+    @test tot.total[1] ≈ 3989986 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 900323 rtol = SE_TOL
+    mn = mean(:api00, clus1)
+    @test mn.mean[1] ≈ 644.17 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 23.534 rtol = SE_TOL
+    # equivalent R code and results:
+    # > clus1 <- svydesign(data=apiclus1, id=~dnum, weights=~pw)
+    # > clus1rep <- as.svrepdesign(clus1, type="bootstrap", replicates=4000)
+    # > svytotal(~api00, clus1rep)
+    #         total     SE
+    # api00 3989986 900323
+    # > svymean(~api00, clus1rep)
+    #         mean     SE
+    # api00 644.17 23.534
 
-    @test total(:api00, dclus1).total[1] ≈ 5949162 atol = 1
-    @test total(:api00, dclus1).SE[1] ≈ 1352953 atol = 50000 # without fpc as it hasn't been figured out for bootstrap. 
-    
+    # Vector{Symbol}
+    tot = total([:api00, :enroll], clus1)
+    mn = mean([:api00, :enroll], clus1)
+    ## :api00
+    @test tot.total[1] ≈ 3989986 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 900323 rtol = SE_TOL
+    @test mn.mean[1] ≈ 644.17 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 23.534 rtol = SE_TOL
+    ## :enroll
+    @test tot.total[2] ≈ 3404940 rtol = STAT_TOL
+    @test tot.SE[2] ≈ 941501 rtol = SE_TOL
+    @test mn.mean[2] ≈ 549.72 rtol = STAT_TOL
+    @test mn.SE[2] ≈ 46.070 rtol = SE_TOL
+    # equivalent R code and results:
+    # > svytotal(~api00+~enroll, clus1rep)
+    #     total     SE
+    # api00  3989986 900323
+    # enroll 3404940 941501
+    # > svymean(~api00+~enroll, clus1rep)
+    #     mean     SE
+    # api00  644.17 23.534
+    # enroll 549.72 46.070
+
+    # subpopulation
+    tot = total(:api00, :cname, clus1)
+    @test size(tot)[1] == apiclus1.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 328620.49 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 292840.83 rtol = SE_TOL
+    @test filter(:cname => ==("San Diego"), tot).total[1] ≈ 1227596.71 rtol = STAT_TOL
+    @test filter(:cname => ==("San Diego"), tot).SE[1] ≈ 860028.39 rtol = SE_TOL
+    mn = mean(:api00, :cname, clus1)
+    @test size(mn)[1] == apiclus1.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 647.2667 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 41.537132 rtol = 1 # tolerance is too large
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 732.0769 rtol = STAT_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 52.336574 rtol = SE_TOL
+    # equivalent R code (results cause clutter):
+    # > svyby(~api00, ~cname, clus1rep, svytotal)
+    # > svyby(~api00, ~cname, clus1rep, svymean)
 end
\ No newline at end of file

From 734f1bbb78862d0f04c9a4c09cb60cce52c648c2 Mon Sep 17 00:00:00 2001
From: smishr <43640926+smishr@users.noreply.github.com>
Date: Tue, 10 Jan 2023 15:52:53 +0530
Subject: [PATCH 13/80] Update survey design, add tests, remove extra quantile

---
 src/SurveyDesign.jl  | 39 +++++++++++++++++++++++++--------------
 src/quantile.jl      |  9 ---------
 src/show.jl          |  8 ++++----
 test/SurveyDesign.jl | 41 +++++++++++++++++++++++++++++++++++++++--
 4 files changed, 68 insertions(+), 29 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 1541a3b9..7b68ea86 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -33,15 +33,14 @@ julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistak
 
 julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
 SurveyDesign:
-data: 183x46 DataFrame
+data: 183x44 DataFrame
 cluster: dnum
 design.data[!,design.cluster]: 637, 637, 637, ..., 448
 popsize: popsize
 design.data[!,design.popsize]: 9240.0, 9240.0, 9240.0, ..., 9240.0
 sampsize: sampsize
 design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
-design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
+design.data[!,design.allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
 ```
 """
 struct SurveyDesign <: AbstractSurveyDesign
@@ -50,9 +49,15 @@ struct SurveyDesign <: AbstractSurveyDesign
     popsize::Symbol
     sampsize::Symbol
     strata::Symbol
-    pps::Bool
+    weights::Symbol # Effective weights in case of singlestage approx supported
+    allprobs::Symbol # Right now only singlestage approx supported
+    pps::Bool # TODO functionality
     # Single stage clusters sample, like apiclus1
-    function SurveyDesign(data::AbstractDataFrame; strata::Union{Nothing,Symbol} = nothing, weights::Union{Nothing,Symbol}= nothing, clusters::Union{Nothing, Symbol, Vector{Symbol}} = nothing, popsize::Union{Nothing, Int,Symbol}=nothing) 
+    function SurveyDesign(data::AbstractDataFrame;
+        strata::Union{Nothing,Symbol} = nothing,
+        weights::Union{Nothing,Symbol}= nothing,
+        clusters::Union{Nothing, Symbol, Vector{Symbol}} = nothing,
+        popsize::Union{Nothing, Int,Symbol}=nothing) 
         # sampsize here is number of clusters completely sampled, popsize is total clusters in population
         if typeof(strata) <:Nothing
             data.false_strata = repeat(["FALSE_STRATA"], nrow(data))
@@ -73,20 +78,26 @@ struct SurveyDesign <: AbstractSurveyDesign
         sampsize_labels = :sampsize
         data[!, sampsize_labels] = fill(length(unique(data[!, cluster])),(nrow(data),))
         if !(typeof(popsize) <: Nothing)
-            data[!, :weights] = data[!, popsize] ./ data[!, sampsize_labels]
-        elseif !(typeof(weights) <: Nothing)
-            data.weights = data[!, weights]
+            weights_labels = :weights
+            data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
+        elseif typeof(weights) <: Symbol
+            if !(typeof(data[!,weights]) <:Vector{<:Real})
+                @show typeof(data[!,weights])
+                error("weights column has to be numeric")
+            end
+            weights_labels = weights
         else
-            data.weights = repeat([1], nrow(data))
+            weights_labels = :weights
+            data[!,weights_labels] = repeat([1], nrow(data))
         end
-        data[!, :probs] = 1 ./ data[!, :weights] # Many formulae are easily defined in terms of sampling probabilties
-        data[!, :allprobs] = data[!, :probs] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
-        pps = false
+        allprobs_labels = :allprobs
+        data[!, allprobs_labels] = 1 ./ data[!, weights_labels] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
+        pps = false # for now no explicit pps support
         if !(typeof(popsize) <: Symbol)
-            data.popsize = repeat([sum(data.weights)], nrow(data))
+            data.popsize = repeat([sum(data[!,weights_labels])], nrow(data))
             popsize = :popsize
         end
-        new(data, cluster, popsize, sampsize_labels, strata, pps)
+        new(data, cluster, popsize, sampsize_labels, strata, weights_labels, allprobs_labels, pps)
     end
 end
 
diff --git a/src/quantile.jl b/src/quantile.jl
index 1cc9646c..d4e399a5 100644
--- a/src/quantile.jl
+++ b/src/quantile.jl
@@ -41,13 +41,4 @@ function quantile(var::Symbol, design::SurveyDesign, p::Union{<:Real,Vector{<:Re
     df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p))
     # TODO: Add CI and SE of the quantile
     return df
-end
-
-function quantile(var::Symbol, design::SurveyDesign, p::Union{<:Real,Vector{<:Real}}; 
-    alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...)
-    v = design.data[!, var]
-    probs = design.data[!, :probs]
-    df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p)) # Not sure which quantile defintion this returns
-    # TODO: Add CI and SE of the quantile
-    return df
 end
\ No newline at end of file
diff --git a/src/show.jl b/src/show.jl
index 3319e653..bb37059c 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -46,8 +46,8 @@ function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
     printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
     printinfo(io, "sampsize", string(design.sampsize); newline=true)
     printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
-    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
+    # printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
+    printinfo(io, "design.data[!,design.allprobs]", makeshort(design.data.allprobs))
 end
 
 "Print information about a repliocate design."
@@ -62,8 +62,8 @@ function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
     printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
     printinfo(io, "sampsize", string(design.sampsize); newline=true)
     printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
-    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
+    # printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
+    printinfo(io, "design.data[!,design.allprobs]", makeshort(design.data.allprobs))
     printstyled(io, "replicates: "; bold=true)
     println(io, design.replicates)
 end
\ No newline at end of file
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index 6851f4e8..adc927e3 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -1,4 +1,35 @@
-@testset "SurveyDesign" begin
+@testset "SurveyDesign_srs" begin
+    ##### Simple Random Sample tests
+    # Load API datasets
+    apisrs_original = load_data("apisrs")
+    apisrs_original[!, :derived_probs] = 1 ./ apisrs_original.pw
+    apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1))
+    ##############################
+    ### Basic functionality
+    ### weights as Symbol
+    apisrs = copy(apisrs_original)
+    srs_weights = SurveyDesign(apisrs, weights=:pw)
+    @test srs_weights.data[!,srs_weights.weights][1] ≈ 30.97 atol = 1e-4
+    @test srs_weights.data[!,srs_weights.weights] == 1 ./ srs_weights.data[!,srs_weights.allprobs]
+    ##############################
+    ### Weights as non-numeric error
+    apisrs = copy(apisrs_original)
+    @test_throws ErrorException SurveyDesign(apisrs, weights=:stype)
+end
+
+@testset "SurveyDesign_strat" begin
+    ### StratifiedSample tests
+    # Load API datasets
+    apistrat_original = load_data("apistrat")
+    apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw
+    apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw
+
+    apistrat = copy(apistrat_original)
+    strat = SurveyDesign(apistrat, strata = :stype, weights = :pw) |> bootweights
+
+end
+
+@testset "SurveyDesign_multistage" begin
     # Load API datasets
     apiclus1_original = load_data("apiclus1")
     apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
@@ -9,4 +40,10 @@
     @test dclus1.data[!, :weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
     @test dclus1.data[!,dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
     @test dclus1.data[!,:allprobs] ≈ dclus1.data[!,:probs] atol = 1e-4
-end
\ No newline at end of file
+    
+    ##############################
+    # Load API datasets
+    nhanes = load_data("nhanes")
+    nhanes_design = SurveyDesign(nhanes; clusters = :SDMVPSU, strata = :SDMVSTRA, weights = :WTMEC2YR)
+end
+

From 547725ffb6d111ff0e9ce2de2ddc88bbe2f7b98b Mon Sep 17 00:00:00 2001
From: smishr <43640926+smishr@users.noreply.github.com>
Date: Tue, 10 Jan 2023 16:07:28 +0530
Subject: [PATCH 14/80] julia formatter

---
 src/SurveyDesign.jl  | 21 +++++++++++----------
 test/SurveyDesign.jl |  6 ++++++
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 7b68ea86..69fe3b51 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -54,12 +54,13 @@ struct SurveyDesign <: AbstractSurveyDesign
     pps::Bool # TODO functionality
     # Single stage clusters sample, like apiclus1
     function SurveyDesign(data::AbstractDataFrame;
-        strata::Union{Nothing,Symbol} = nothing,
-        weights::Union{Nothing,Symbol}= nothing,
-        clusters::Union{Nothing, Symbol, Vector{Symbol}} = nothing,
-        popsize::Union{Nothing, Int,Symbol}=nothing) 
+        clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
+        strata::Union{Nothing,Symbol}=nothing,
+        popsize::Union{Nothing,Int,Symbol}=nothing,
+        weights::Union{Nothing,Symbol}=nothing
+    )
         # sampsize here is number of clusters completely sampled, popsize is total clusters in population
-        if typeof(strata) <:Nothing
+        if typeof(strata) <: Nothing
             data.false_strata = repeat(["FALSE_STRATA"], nrow(data))
             strata = :false_strata
         end
@@ -76,25 +77,25 @@ struct SurveyDesign <: AbstractSurveyDesign
         end
         # For one-stage sample only one sampsize vector
         sampsize_labels = :sampsize
-        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])),(nrow(data),))
+        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])), (nrow(data),))
         if !(typeof(popsize) <: Nothing)
             weights_labels = :weights
             data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
         elseif typeof(weights) <: Symbol
-            if !(typeof(data[!,weights]) <:Vector{<:Real})
-                @show typeof(data[!,weights])
+            if !(typeof(data[!, weights]) <: Vector{<:Real})
+                @show typeof(data[!, weights])
                 error("weights column has to be numeric")
             end
             weights_labels = weights
         else
             weights_labels = :weights
-            data[!,weights_labels] = repeat([1], nrow(data))
+            data[!, weights_labels] = repeat([1], nrow(data))
         end
         allprobs_labels = :allprobs
         data[!, allprobs_labels] = 1 ./ data[!, weights_labels] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
         pps = false # for now no explicit pps support
         if !(typeof(popsize) <: Symbol)
-            data.popsize = repeat([sum(data[!,weights_labels])], nrow(data))
+            data.popsize = repeat([sum(data[!, weights_labels])], nrow(data))
             popsize = :popsize
         end
         new(data, cluster, popsize, sampsize_labels, strata, weights_labels, allprobs_labels, pps)
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index adc927e3..90989918 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -11,10 +11,16 @@
     srs_weights = SurveyDesign(apisrs, weights=:pw)
     @test srs_weights.data[!,srs_weights.weights][1] ≈ 30.97 atol = 1e-4
     @test srs_weights.data[!,srs_weights.weights] == 1 ./ srs_weights.data[!,srs_weights.allprobs]
+    ### popsize as Symbol
+    apisrs = copy(apisrs_original)
+    srs_pop = SurveyDesign(apisrs, popsize=:fpc)
+    @test srs_pop.data[!,srs_pop.weights][1] ≈ 30.97 atol = 1e-4
+    @test srs_pop.data[!,srs_pop.weights] == 1 ./ srs_pop.data[!,srs_pop.allprobs]
     ##############################
     ### Weights as non-numeric error
     apisrs = copy(apisrs_original)
     @test_throws ErrorException SurveyDesign(apisrs, weights=:stype)
+
 end
 
 @testset "SurveyDesign_strat" begin

From 81b77e8c55fd3020585354615fa1358bb24476aa Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 16:52:42 +0530
Subject: [PATCH 15/80] Remove references to old designs, add references to new
 functions

---
 docs/src/api.md   | 29 +++++++++--------------------
 docs/src/index.md | 41 +++++------------------------------------
 src/Survey.jl     |  5 ++---
 3 files changed, 16 insertions(+), 59 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 062d379d..5431b9ae 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -7,27 +7,19 @@ Module = [Survey]
 Order = [:type, :function]
 Private = false
 ```
-Survey data can be loaded from a `DataFrame` into a survey design. The package currently supports simple random sample and stratified sample designs. 
-```@docs
-AbstractSurveyDesign
-SimpleRandomSample
-StratifiedSample
-```
 
 ```@docs
+AbstractSurveyDesign
+SurveyDesign
+ReplicateDesign
 load_data
-Survey.mean(x::Symbol, design::SimpleRandomSample)
-total(x::Symbol, design::SimpleRandomSample)
+bootweights
+mean(x::Symbol, design::ReplicateDesign)
+mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
+total(x::Symbol, design::ReplicateDesign)
+total(x::Symbol, domain::Symbol, design::ReplicateDesign)
 quantile
-```
-
-It is often required to estimate population parameters for sub-populations of interest. For example, you may have a sample of heights, but you want the average heights of males and females separately. 
-```@docs
-mean(x::Symbol, by::Symbol, design::SimpleRandomSample) 
-total(x::Symbol, by::Symbol, design::SimpleRandomSample) 
-```
-```@docs
-ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesign)
+ratio(variable_num::Symbol, variable_den::Symbol, design::SurveyDesign)
 plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 hist(design::AbstractSurveyDesign, var::Symbol,
@@ -35,7 +27,4 @@ hist(design::AbstractSurveyDesign, var::Symbol,
 				 normalization = :density,
 				 kwargs...
     			)
-dim(design::AbstractSurveyDesign)
-dimnames(design::AbstractSurveyDesign)
-colnames(design::AbstractSurveyDesign)
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
index a099c73c..eddbcf0f 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -18,45 +18,14 @@ with at least 100 students and for various probability samples of the data.
 The API program has been discontinued at the end of 2018. Information is archived at
 [https://www.cde.ca.gov/re/pr/api.asp](https://www.cde.ca.gov/re/pr/api.asp)
 
-Firstly, a survey design needs a dataset from which to gather information. 
-
-
-The sample datasets provided with the package can be loaded as `DataFrames` using the `load_data` function:
+Firstly, a survey design needs a dataset from which to gather information. The sample
+datasets provided with the package can be loaded as `DataFrame`s using [`load_data`](@ref):
 
 ```julia
 julia> apisrs = load_data("apisrs");
 ```
-`apisrs` is a simple random sample of the Academic Performance Index of Californian schools.
-
-Next, we can build a design. The design corresponding to a simple random sample is [`SimpleRandomSample`](@ref), which can be instantiated by calling the constructor:
-
-```julia
-julia> srs = SimpleRandomSample(apisrs; weights = :pw)
-SimpleRandomSample:
-data: 200x42 DataFrame
-weights: 31.0, 31.0, 31.0, ..., 31.0
-probs: 0.0323, 0.0323, 0.0323, ..., 0.0323
-fpc: 6194, 6194, 6194, ..., 6194
-popsize: 6194
-sampsize: 200
-sampfraction: 0.0323
-ignorefpc: false
-```
 
-With a `SimpleRandomSample` (as well as with any subtype of [`AbstractSurveyDesign`](@ref)) it is possible to calculate estimates of the mean, population total, etc., for a given variable, along with the corresponding standard errors.
+`apisrs` is a simple random sample of the Academic Performance Index of Californian schools.
 
-```julia
-julia> mean(:api00, srs)
-1×2 DataFrame
- Row │ mean     sem     
-     │ Float64  Float64 
-─────┼──────────────────
-   1 │ 656.585  9.24972
-
-julia> total(:api00, srs)
-1×2 DataFrame
- Row │ total      se_total 
-     │ Float64    Float64  
-─────┼─────────────────────
-   1 │ 4.06689e6   57292.8
-```
+Next, we can build a design.
+#TODO: continue tutorial
diff --git a/src/Survey.jl b/src/Survey.jl
index fee13d6d..dd71a092 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -28,12 +28,11 @@ include("ratio.jl")
 include("by.jl")
 
 export load_data
-export AbstractSurveyDesign, SimpleRandomSample, StratifiedSample
-export SurveyDesign
+export AbstractSurveyDesign, SurveyDesign, ReplicateDesign
 export dim, colnames, dimnames
 export mean, total, quantile
 export plot
-export hist
+export hist, sturges, freedman_diaconis
 export boxplot
 export bootweights
 export jkknife

From 751813822326ea0e8001b0f42314dfc5e16795d7 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 16:57:01 +0530
Subject: [PATCH 16/80] Fix docstring and minor style modifications

---
 src/bootstrap.jl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 2055b6bf..b4e226a8 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -1,15 +1,15 @@
 """
 ```jldoctest
-julia> using Survey, Random;
+julia> using Random
 
-julia> apiclus1 = load_data("apiclus1"); 
+julia> apiclus1 = load_data("apiclus1");
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum);
 
-julia> rng = MersenneTwister(111); 
+julia> rng = MersenneTwister(111);
 
-julia> Survey.bootweights(dclus1; replicates=1000, rng) 
-Survey.ReplicateDesign:
+julia> bootweights(dclus1; replicates=1000, rng)
+ReplicateDesign:
 data: 183x1046 DataFrame
 cluster: dnum
 design.data[!,design.cluster]: 637, 637, 637, ..., 448
@@ -22,7 +22,7 @@ design.data[!,:allprobs]: 1.0, 1.0, 1.0, ..., 1.0
 replicates: 1000
 ```
 """
-function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwister(1234))
+function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(1234))
     H = length(unique(design.data[!, design.strata]))
     stratified = groupby(design.data, design.strata)
     function replicate(stratified, H)
@@ -45,10 +45,10 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
         return transform(stratified, :whij)
     end
     df = replicate(stratified, H)
-    rename!(df,:whij => :replicate_1)
+    rename!(df, :whij => :replicate_1)
     df.replicate_1 = disallowmissing(df.replicate_1)
     for i in 2:(replicates)
-        df[!, "replicate_"*string(i)] = disallowmissing(replicate(stratified, H).whij)
+        df[!, "replicate_" * string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
 end
\ No newline at end of file

From efe2f6a32db2c9fddfc61135ec7baf54aa379547 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 17:02:57 +0530
Subject: [PATCH 17/80] Convert indentation to spaces

---
 docs/src/api.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 5431b9ae..5b538a55 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -23,8 +23,8 @@ ratio(variable_num::Symbol, variable_den::Symbol, design::SurveyDesign)
 plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 hist(design::AbstractSurveyDesign, var::Symbol,
-				 bins::Union{Integer, AbstractVector} = freedman_diaconis(design, var);
-				 normalization = :density,
-				 kwargs...
-    			)
+                 bins::Union{Integer, AbstractVector} = freedman_diaconis(design, var);
+                 normalization = :density,
+                 kwargs...
+                )
 ```

From 2bac93875eaa85a5a16fb06cbb2fa513d16684a0 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 17:16:22 +0530
Subject: [PATCH 18/80] Fix docstrings, minor rearrangements and style checks

---
 src/SurveyDesign.jl |  65 ++++++++++++++------------
 src/mean.jl         | 108 ++++++++++++++++++--------------------------
 src/total.jl        |  81 ++++++++++++++++++---------------
 3 files changed, 124 insertions(+), 130 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 1541a3b9..07b9e1de 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -13,35 +13,32 @@ abstract type AbstractSurveyDesign end
 """
     SurveyDesign <: AbstractSurveyDesign
 
-Survey design sampled by one stage clusters sampling.
-Clusters chosen by SRS followed by complete sampling of selected clusters.
-Assumes each individual in one and only one clusters; disjoint and nested clusters.
+General survey design encompassing a simple random, stratified, cluster or multi-stage design.
 
-`clusters` must be specified as a Symbol name of a column in `data`.
+In the case of cluster sample, the clusters are chosen by simple random sampling. All
+individuals in one cluster are sampled. The clusters are considered disjoint and nested.
 
 # Arguments:
 `data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
-`clusters::Symbol`: the stratification variable - must be given as a column in `data`.
-`popsize::Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}}=nothing`: the (expected) survey population size. For 
-
-`weights::Union{Nothing,Symbol,Vector{<:Real}}=nothing`: the sampling weights.
+`strata::Union{Nothing, Symbol}=nothing`: the stratification variable - must be given as a column in `data`.
+`clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing`: the clustering variable - must be given as column(s) in `data`.
+`weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
+`popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
 
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");
 
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
-
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
+julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw)
 SurveyDesign:
 data: 183x46 DataFrame
 cluster: dnum
 design.data[!,design.cluster]: 637, 637, 637, ..., 448
 popsize: popsize
-design.data[!,design.popsize]: 9240.0, 9240.0, 9240.0, ..., 9240.0
+design.data[!,design.popsize]: 6190.0, 6190.0, 6190.0, ..., 6190.0
 sampsize: sampsize
 design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
-design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
+design.data[!,:probs]: 0.0295, 0.0295, 0.0295, ..., 0.0295
+design.data[!,:allprobs]: 0.0295, 0.0295, 0.0295, ..., 0.0295
 ```
 """
 struct SurveyDesign <: AbstractSurveyDesign
@@ -52,9 +49,15 @@ struct SurveyDesign <: AbstractSurveyDesign
     strata::Symbol
     pps::Bool
     # Single stage clusters sample, like apiclus1
-    function SurveyDesign(data::AbstractDataFrame; strata::Union{Nothing,Symbol} = nothing, weights::Union{Nothing,Symbol}= nothing, clusters::Union{Nothing, Symbol, Vector{Symbol}} = nothing, popsize::Union{Nothing, Int,Symbol}=nothing) 
+    function SurveyDesign(
+		data::AbstractDataFrame;
+        strata::Union{Nothing, Symbol}=nothing,
+        clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing,
+        weights::Union{Nothing, Symbol}=nothing,
+        popsize::Union{Nothing, Int, Symbol}=nothing
+    )
         # sampsize here is number of clusters completely sampled, popsize is total clusters in population
-        if typeof(strata) <:Nothing
+        if typeof(strata) <: Nothing
             data.false_strata = repeat(["FALSE_STRATA"], nrow(data))
             strata = :false_strata
         end
@@ -71,7 +74,7 @@ struct SurveyDesign <: AbstractSurveyDesign
         end
         # For one-stage sample only one sampsize vector
         sampsize_labels = :sampsize
-        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])),(nrow(data),))
+        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])), (nrow(data),))
         if !(typeof(popsize) <: Nothing)
             data[!, :weights] = data[!, popsize] ./ data[!, sampsize_labels]
         elseif !(typeof(weights) <: Nothing)
@@ -91,24 +94,26 @@ struct SurveyDesign <: AbstractSurveyDesign
 end
 
 """
-```jldoctest
-julia> apiclus1 = load_data("apiclus1");
+    ReplicateDesign <: AbstractSurveyDesign
 
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
+Survey design obtained by replicating an original design using [`bootweights`](@ref).
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apistrat = load_data("apistrat");
 
-julia> bclus1 = Survey.bootweights(dclus1; replicates = 1000)
-Survey.ReplicateDesign:
-data: 183x1046 DataFrame
-cluster: dnum
-design.data[!,design.cluster]: 637, 637, 637, ..., 448
+julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
+
+julia> bootstrat = bootweights(strat; replicates=1000)
+ReplicateDesign:
+data: 200x1046 DataFrame
+cluster: false_cluster
+design.data[!,design.cluster]: 1, 2, 3, ..., 200
 popsize: popsize
-design.data[!,design.popsize]: 9240.0, 9240.0, 9240.0, ..., 9240.0
+design.data[!,design.popsize]: 6190.0, 6190.0, 6190.0, ..., 6190.0
 sampsize: sampsize
-design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
-design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
+design.data[!,design.sampsize]: 200, 200, 200, ..., 200
+design.data[!,:probs]: 0.0226, 0.0226, 0.0226, ..., 0.0662
+design.data[!,:allprobs]: 0.0226, 0.0226, 0.0226, ..., 0.0662
 replicates: 1000
 ```
 """
diff --git a/src/mean.jl b/src/mean.jl
index 2bf8b925..0ef5bb37 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -1,19 +1,27 @@
 """
-```jldoctest
-julia> using Survey, Random, StatsBase; 
+    mean(var, design)
 
-julia> apiclus1 = load_data("apiclus1"); 
+Compute the estimated mean of one or more variables within a survey design.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000)
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> mean(:api00, bclus1)
+julia> mean(:api00, clus1)
 1×2 DataFrame
- Row │ mean     SE      
-     │ Float64  Float64 
+ Row │ mean     SE
+     │ Float64  Float64
 ─────┼──────────────────
-   1 │ 644.169  23.7208
+   1 │ 644.169  23.2919
+
+julia> mean([:api00, :enroll], clus1)
+2×3 DataFrame
+ Row │ names   mean     SE
+     │ String  Float64  Float64
+─────┼──────────────────────────
+   1 │ api00   644.169  23.2919
+   2 │ enroll  549.716  45.3655
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
@@ -22,59 +30,39 @@ function mean(x::Symbol, design::ReplicateDesign)
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(mean = X, SE = sqrt(variance))
 end
+
+function mean(x::Vector{Symbol}, design::ReplicateDesign)
+    df = reduce(vcat, [mean(i, design) for i in x])
+    insertcols!(df, 1, :names => String.(x))
+    return df
+end
+
 """
-```jldoctest
-julia> using Survey, Random, StatsBase; 
+    mean(var, domain, design)
 
-julia> apiclus1 = load_data("apiclus1"); 
+Compute the estimated mean within a domain.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000)
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> mean(:api00, :cname, bclus1) |> print
-38×3 DataFrame
- Row │ cname            statistic  SE          
-     │ String15         Float64    Any         
-─────┼─────────────────────────────────────────
-   1 │ Kern               573.6    44.5578
-   2 │ Los Angeles        658.156  22.2058
-   3 │ Orange             749.333  29.5701
-   4 │ San Luis Obispo    739.0    3.37273e-14
-   5 │ San Francisco      558.333  45.6266
-   6 │ Modoc              671.0    0.0
-   7 │ Alameda            676.091  37.3104
-   8 │ Solano             623.0    45.1222
-   9 │ Santa Cruz         624.333  113.43
-  10 │ Monterey           605.0    85.4116
-  11 │ San Bernardino     614.462  30.0066
-  12 │ Riverside          574.3    27.2025
-  13 │ Tulare             664.0    22.0097
-  14 │ San Diego          684.5    32.2241
-  15 │ Sacramento         616.0    39.7877
-  16 │ Marin              799.667  35.2397
-  17 │ Imperial           622.0    0.0
-  18 │ Ventura            743.8    31.7425
-  19 │ San Joaquin        608.667  40.8592
-  20 │ Sonoma             630.0    0.0
-  21 │ Fresno             600.25   56.9173
-  22 │ Santa Clara        718.286  58.562
-  23 │ Sutter             744.0    0.0
-  24 │ Contra Costa       766.111  53.598
-  25 │ Stanislaus         736.333  5.26576
-  26 │ Madera             480.0    3.5861
-  27 │ Placer             759.0    0.0
-  28 │ Lassen             752.0    0.0
-  29 │ Santa Barbara      728.667  25.8749
-  30 │ San Mateo          617.0    78.1173
-  31 │ Siskiyou           699.0    0.0
-  32 │ Kings              469.5    44.6284
-  33 │ Shasta             754.0    60.5829
-  34 │ Yolo               475.0    0.0
-  35 │ Calaveras          790.0    0.0
-  36 │ Napa               727.0    50.5542
-  37 │ Lake               804.0    0.0
-  38 │ Merced             595.0    0
+julia> mean(:api00, :cname, clus1)
+11×3 DataFrame
+ Row │ cname        mean     SE
+     │ String15     Float64  Any
+─────┼───────────────────────────────────
+   1 │ Alameda      669.0    1.27388e-13
+   2 │ Fresno       472.0    1.13687e-13
+   3 │ Kern         452.5    0.0
+   4 │ Los Angeles  647.267  47.4938
+   5 │ Mendocino    623.25   1.0931e-13
+   6 │ Merced       519.25   4.57038e-15
+   7 │ Orange       710.563  2.19684e-13
+   8 │ Plumas       709.556  1.27773e-13
+   9 │ San Diego    659.436  2.63446
+  10 │ San Joaquin  551.189  2.17471e-13
+  11 │ Santa Clara  732.077  56.2584
 ```
 """
 function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
@@ -82,10 +70,4 @@ function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
     df = bydomain(x, domain, design, weighted_mean)
     rename!(df, :statistic => :mean)
     return df
-end
-
-function mean(x::Vector{Symbol}, design::ReplicateDesign)
-    df = reduce(vcat, [mean(i, design) for i in x])
-    insertcols!(df, 1, :names => String.(x))
-    return df
 end
\ No newline at end of file
diff --git a/src/total.jl b/src/total.jl
index fdf83216..e5fbbdcb 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -1,21 +1,27 @@
 """
-```jldoctest
-julia> using Survey; 
-
-julia> apiclus1 = load_data("apiclus1"); 
+    total(var, design)
 
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
+Compute the estimated population total for one or more variables within a survey design.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000); 
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> total(:api00, bclus1)
+julia> total(:api00, clus1)
 1×2 DataFrame
- Row │ total      SE        
-     │ Float64    Float64   
+ Row │ total      SE
+     │ Float64    Float64
 ─────┼──────────────────────
-   1 │ 5.94916e6  1.31977e6
+   1 │ 3.98999e6  9.22175e5
+
+julia> total([:api00, :enroll], clus1)
+2×3 DataFrame
+ Row │ names   total      SE
+     │ String  Float64    Float64
+─────┼──────────────────────────────
+   1 │ api00   3.98999e6  9.22175e5
+   2 │ enroll  3.40494e6  9.51557e5
 ```
 """
 function total(x::Symbol, design::ReplicateDesign)
@@ -24,41 +30,42 @@ function total(x::Symbol, design::ReplicateDesign)
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(total = X, SE = sqrt(variance))
 end
+
+function total(x::Vector{Symbol}, design::ReplicateDesign)
+    df = reduce(vcat, [total(i, design) for i in x])
+    insertcols!(df, 1, :names => String.(x))
+    return df
+end
+
 """
-```jldoctest
-julia> using Survey; 
+    total(var, domain, design)
 
-julia> apiclus1 = load_data("apiclus1"); 
+Compute the estimated population total within a domain.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000); 
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> total(:api00, :cname, bclus1) |> print
+julia> total(:api00, :cname, clus1)
 11×3 DataFrame
- Row │ cname        statistic      SE        
-     │ String15     Float64        Any       
-─────┼───────────────────────────────────────
-   1 │ Alameda          3.71384e5  3.78375e5
-   2 │ Fresno       95281.1        96134.8
-   3 │ Kern         45672.3        43544.7
-   4 │ Los Angeles      4.89981e5  4.42865e5
-   5 │ Mendocino        1.25813e5  1.22757e5
-   6 │ Merced           1.04819e5  1.09032e5
-   7 │ Orange           5.73756e5  6.01213e5
-   8 │ Plumas           3.2228e5   3.26443e5
-   9 │ San Diego        1.83038e6  1.34155e6
-  10 │ San Joaquin      1.02922e6  1.04048e6
-  11 │ Santa Clara      9.60583e5  643492.0
+ Row │ cname        total           SE
+     │ String15     Float64         Any
+─────┼────────────────────────────────────────
+   1 │ Alameda      249080.0        2.48842e5
+   2 │ Fresno        63903.1        64452.2
+   3 │ Kern          30631.5        31083.0
+   4 │ Los Angeles       3.2862e5   2.93649e5
+   5 │ Mendocino     84380.6        83154.4
+   6 │ Merced        70300.2        69272.5
+   7 │ Orange            3.84807e5  3.90097e5
+   8 │ Plumas            2.16147e5  2.17811e5
+   9 │ San Diego         1.2276e6   8.78559e5
+  10 │ San Joaquin       6.90276e5  6.90685e5
+  11 │ Santa Clara       6.44244e5  4.09943e5
 ```
 """
 function total(x::Symbol, domain::Symbol, design::ReplicateDesign)
     df = bydomain(x, domain, design, wsum)
     rename!(df, :statistic => :total)
-end
-
-function total(x::Vector{Symbol}, design::ReplicateDesign)
-    df = reduce(vcat, [total(i, design) for i in x])
-    insertcols!(df, 1, :names => String.(x))
-    return df
 end
\ No newline at end of file

From 76275cd66b93f0b5794d8a8638023c2e2857da01 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 17:26:11 +0530
Subject: [PATCH 19/80] Fix argument rendering in docstring

---
 src/SurveyDesign.jl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 07b9e1de..69657cd0 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -18,12 +18,14 @@ General survey design encompassing a simple random, stratified, cluster or multi
 In the case of cluster sample, the clusters are chosen by simple random sampling. All
 individuals in one cluster are sampled. The clusters are considered disjoint and nested.
 
+`strata` and `clusters` must be given as columns in `data`.
+
 # Arguments:
-`data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
-`strata::Union{Nothing, Symbol}=nothing`: the stratification variable - must be given as a column in `data`.
-`clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing`: the clustering variable - must be given as column(s) in `data`.
-`weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
-`popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
+- `data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
+- `strata::Union{Nothing, Symbol}=nothing`: the stratification variable.
+- `clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing`: the clustering variable.
+- `weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
+- `popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
 
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");

From b63acd1498ef386194c928a0225f72660bdad54a Mon Sep 17 00:00:00 2001
From: smishr <43640926+smishr@users.noreply.github.com>
Date: Tue, 10 Jan 2023 18:05:03 +0530
Subject: [PATCH 20/80] Add tests for stratified sampling SurveyDesign

---
 src/SurveyDesign.jl  |  3 +--
 test/SurveyDesign.jl | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 69fe3b51..0b53fb57 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -83,7 +83,6 @@ struct SurveyDesign <: AbstractSurveyDesign
             data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
         elseif typeof(weights) <: Symbol
             if !(typeof(data[!, weights]) <: Vector{<:Real})
-                @show typeof(data[!, weights])
                 error("weights column has to be numeric")
             end
             weights_labels = weights
@@ -95,8 +94,8 @@ struct SurveyDesign <: AbstractSurveyDesign
         data[!, allprobs_labels] = 1 ./ data[!, weights_labels] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
         pps = false # for now no explicit pps support
         if !(typeof(popsize) <: Symbol)
-            data.popsize = repeat([sum(data[!, weights_labels])], nrow(data))
             popsize = :popsize
+            data[!,popsize] = repeat([sum(data[!, weights_labels])], nrow(data))
         end
         new(data, cluster, popsize, sampsize_labels, strata, weights_labels, allprobs_labels, pps)
     end
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index 90989918..01d5baf5 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -16,11 +16,12 @@
     srs_pop = SurveyDesign(apisrs, popsize=:fpc)
     @test srs_pop.data[!,srs_pop.weights][1] ≈ 30.97 atol = 1e-4
     @test srs_pop.data[!,srs_pop.weights] == 1 ./ srs_pop.data[!,srs_pop.allprobs]
+    ### Both ways should achieve same weights and allprobs!
+    @test srs_pop.data[!,srs_pop.weights] == srs_weights.data[!,srs_weights.weights]
     ##############################
     ### Weights as non-numeric error
     apisrs = copy(apisrs_original)
     @test_throws ErrorException SurveyDesign(apisrs, weights=:stype)
-
 end
 
 @testset "SurveyDesign_strat" begin
@@ -29,10 +30,17 @@ end
     apistrat_original = load_data("apistrat")
     apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw
     apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw
-
+    ##############################
+    ### weights as Symbol
     apistrat = copy(apistrat_original)
-    strat = SurveyDesign(apistrat, strata = :stype, weights = :pw) |> bootweights
-
+    strat_wt = SurveyDesign(apistrat, strata=:stype, weights=:pw)
+    @test strat_wt.data[!,strat_wt.weights] == 1 ./ strat_wt.data[!,strat_wt.allprobs]
+    ### popsize as Symbol
+    apistrat = copy(apistrat_original)
+    strat_pop = SurveyDesign(apistrat, strata=:stype, popsize=:fpc)
+    @test strat_pop.data[!,strat_pop.weights] == 1 ./ strat_pop.data[!,strat_pop.allprobs]
+    ##############################
+    # @test strat_pop.data[!,strat_pop.weights] == strat_wt.data[!,strat_wt.weights]
 end
 
 @testset "SurveyDesign_multistage" begin

From e371402ff4b3c95f275e511f29c596095d56db0c Mon Sep 17 00:00:00 2001
From: smishr <43640926+smishr@users.noreply.github.com>
Date: Wed, 11 Jan 2023 13:23:37 +0530
Subject: [PATCH 21/80] nhanes and yrbs testing

---
 src/SurveyDesign.jl  |  4 ++--
 test/SurveyDesign.jl | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index b1d26204..81e99767 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -55,7 +55,7 @@ struct SurveyDesign <: AbstractSurveyDesign
     function SurveyDesign(data::AbstractDataFrame;
         clusters::Union{Nothing,Symbol,Vector{Symbol}}=nothing,
         strata::Union{Nothing,Symbol}=nothing,
-        popsize::Union{Nothing,Int,Symbol}=nothing,
+        popsize::Union{Nothing,Symbol}=nothing,
         weights::Union{Nothing,Symbol}=nothing
     )
         # sampsize here is number of clusters completely sampled, popsize is total clusters in population
@@ -82,7 +82,7 @@ struct SurveyDesign <: AbstractSurveyDesign
             data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
         elseif typeof(weights) <: Symbol
             if !(typeof(data[!, weights]) <: Vector{<:Real})
-                error("weights column has to be numeric")
+                error(string("given weights column ", weights , " is not of numeric type"))
             end
             weights_labels = weights
         else
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index 01d5baf5..be515926 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -61,3 +61,16 @@ end
     nhanes_design = SurveyDesign(nhanes; clusters = :SDMVPSU, strata = :SDMVSTRA, weights = :WTMEC2YR)
 end
 
+@testset "SurveyDesign_realSurveys" begin
+    # Load API datasets
+    yrbs_original = load_data("yrbs")
+    nhanes_original = load_data("nhanes")
+    ##############################
+    # NHANES
+    nhanes = copy(nhanes_original)
+    dnhanes = SingleStageSurveyDesign(nhanes; cluster = :SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR)
+    ##############################
+    # YRBS
+    yrbs = copy(yrbs_original)
+    dyrbs = SingleStageSurveyDesign(yrbs; cluster = :psu, strata=:stratum, weights=:weight)
+end

From ae0cb2aa8bc87b7d318d97046f5f8de8d58a1be9 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Wed, 11 Jan 2023 19:02:01 +0530
Subject: [PATCH 22/80] Change show for `SurveyDesign`

---
 src/show.jl | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 3319e653..5dd5b21a 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -6,16 +6,20 @@ function makeshort(x)
         x = round.(x, sigdigits=3)
     end
     # print short vectors or single values as they are, compress otherwise
-    x = length(x) < 3 ? join(x, ", ") : join(x[1:3], ", ") * ", ..., " * string(last(x))
+    if length(x) > 1
+        return "[" * (length(x) < 3 ? join(x, ", ") : join(x[1:3], ", ") * "  …  " * string(last(x))) * "]"
+    end
+
+    return x
 end
 
 """
 Print information in the form:
     **name:** content[\n]
 """
-function printinfo(io::IO, name::String, content::String; newline::Bool=true)
+function printinfo(io::IO, name::String, content, args...; newline::Bool=true)
     printstyled(io, name, ": "; bold=true)
-    newline ? println(io, content) : print(io, content)
+    newline ? println(io, content, args...) : print(io, content, args...)
 end
 
 "Print information about a survey design."
@@ -33,24 +37,37 @@ function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
     printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
 end
 
-
 "Print information about a survey design."
-function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
+Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) = 
+    surveyshow(IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50)), design)
+
+function surveyshow(io::IO, design::SurveyDesign)
+    # structure name
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)
-    printstyled(io, "data: "; bold=true)
-    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
-    printinfo(io, "cluster", string(design.cluster); newline=true)
-    printinfo(io, "design.data[!,design.cluster]", makeshort(design.data[!,design.cluster]))
-    printinfo(io, "popsize", string(design.popsize); newline=true)
-    printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
-    printinfo(io, "sampsize", string(design.sampsize); newline=true)
-    printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
-    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
+    # data info
+    printinfo(io, "data", summary(design.data))
+    # strata info
+    strata_content =
+        design.strata == :false_strata ?
+            "none" :
+            (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
+    printinfo(io, "strata", strata_content...)
+    # cluster(s) info
+    cluster_content =
+        design.cluster == :false_cluster ?
+            "none" :
+            (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
+    printinfo(io, "cluster", cluster_content...)
+    # popsize and sampsize info
+    printinfo(io, "popsize", "\n    ", makeshort(design.data[!, design.popsize]))
+    printinfo(io, "sampsize", "\n    ", makeshort(design.data[!, design.sampsize]))
+    # weights and probs info
+    printinfo(io, "weights", "\n    ", makeshort(design.data[!, :weights]))
+    printinfo(io, "probs", "\n    ", makeshort(design.data[!, :probs]); newline=false)
 end
 
-"Print information about a repliocate design."
+"Print information about a replicate design."
 function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)

From b92a8d8ed026131248e586595e06341d50af7ff7 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Wed, 11 Jan 2023 19:30:34 +0530
Subject: [PATCH 23/80] Change show for `AbstractSurveyDesign` and
 `ReplicateDesign`, restructure code

---
 src/show.jl | 60 ++++++++++++++++-------------------------------------
 1 file changed, 18 insertions(+), 42 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 5dd5b21a..2af3d6d1 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -1,3 +1,5 @@
+surveyio(io) = IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50))
+
 """
 Helper function that transforms a given `Number` or `Vector` into a short-form string.
 """
@@ -23,41 +25,33 @@ function printinfo(io::IO, name::String, content, args...; newline::Bool=true)
 end
 
 "Print information about a survey design."
-function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
-    type = typeof(design)
-    printstyled(io, "$type:\n"; bold=true)
-    printstyled(io, "data: "; bold=true)
-    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
-    printinfo(io, "weights", makeshort(design.data.weights))
-    printinfo(io, "probs", makeshort(design.data.probs))
-    printinfo(io, "fpc", makeshort(design.data.fpc))
-    printinfo(io, "popsize", makeshort(design.popsize))
-    printinfo(io, "sampsize", makeshort(design.sampsize))
-    printinfo(io, "sampfraction", makeshort(design.sampfraction))
-    printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
-end
+Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign) =
+    surveyshow(surveyio(io), design)
 
-"Print information about a survey design."
 Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) = 
-    surveyshow(IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50)), design)
+    surveyshow(surveyio(io), design)
+
+function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
+    # new_io = IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50))
+    surveyshow(surveyio(io), design)
+    printinfo(surveyio(io), "\nreplicates", design.replicates; newline=false)
+end
 
-function surveyshow(io::IO, design::SurveyDesign)
+function surveyshow(io::IO, design::AbstractSurveyDesign)
     # structure name
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)
     # data info
     printinfo(io, "data", summary(design.data))
     # strata info
-    strata_content =
-        design.strata == :false_strata ?
-            "none" :
-            (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
+    strata_content = design.strata == :false_strata ?
+                     "none" :
+                     (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
     printinfo(io, "strata", strata_content...)
     # cluster(s) info
-    cluster_content =
-        design.cluster == :false_cluster ?
-            "none" :
-            (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
+    cluster_content = design.cluster == :false_cluster ?
+                      "none" :
+                      (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
     printinfo(io, "cluster", cluster_content...)
     # popsize and sampsize info
     printinfo(io, "popsize", "\n    ", makeshort(design.data[!, design.popsize]))
@@ -66,21 +60,3 @@ function surveyshow(io::IO, design::SurveyDesign)
     printinfo(io, "weights", "\n    ", makeshort(design.data[!, :weights]))
     printinfo(io, "probs", "\n    ", makeshort(design.data[!, :probs]); newline=false)
 end
-
-"Print information about a replicate design."
-function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
-    type = typeof(design)
-    printstyled(io, "$type:\n"; bold=true)
-    printstyled(io, "data: "; bold=true)
-    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
-    printinfo(io, "cluster", string(design.cluster); newline=true)
-    printinfo(io, "design.data[!,design.cluster]", makeshort(design.data[!,design.cluster]))
-    printinfo(io, "popsize", string(design.popsize); newline=true)
-    printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
-    printinfo(io, "sampsize", string(design.sampsize); newline=true)
-    printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
-    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
-    printstyled(io, "replicates: "; bold=true)
-    println(io, design.replicates)
-end
\ No newline at end of file

From aaeebb1c35685f64548caaae9349bb1f0b10299e Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Wed, 11 Jan 2023 19:37:14 +0530
Subject: [PATCH 24/80] Revert "Change show for `AbstractSurveyDesign` and
 `ReplicateDesign`, restructure code"

This reverts commit b92a8d8ed026131248e586595e06341d50af7ff7.
---
 src/show.jl | 60 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 2af3d6d1..5dd5b21a 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -1,5 +1,3 @@
-surveyio(io) = IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50))
-
 """
 Helper function that transforms a given `Number` or `Vector` into a short-form string.
 """
@@ -25,33 +23,41 @@ function printinfo(io::IO, name::String, content, args...; newline::Bool=true)
 end
 
 "Print information about a survey design."
-Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign) =
-    surveyshow(surveyio(io), design)
+function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
+    type = typeof(design)
+    printstyled(io, "$type:\n"; bold=true)
+    printstyled(io, "data: "; bold=true)
+    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
+    printinfo(io, "weights", makeshort(design.data.weights))
+    printinfo(io, "probs", makeshort(design.data.probs))
+    printinfo(io, "fpc", makeshort(design.data.fpc))
+    printinfo(io, "popsize", makeshort(design.popsize))
+    printinfo(io, "sampsize", makeshort(design.sampsize))
+    printinfo(io, "sampfraction", makeshort(design.sampfraction))
+    printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
+end
 
+"Print information about a survey design."
 Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) = 
-    surveyshow(surveyio(io), design)
-
-function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
-    # new_io = IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50))
-    surveyshow(surveyio(io), design)
-    printinfo(surveyio(io), "\nreplicates", design.replicates; newline=false)
-end
+    surveyshow(IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50)), design)
 
-function surveyshow(io::IO, design::AbstractSurveyDesign)
+function surveyshow(io::IO, design::SurveyDesign)
     # structure name
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)
     # data info
     printinfo(io, "data", summary(design.data))
     # strata info
-    strata_content = design.strata == :false_strata ?
-                     "none" :
-                     (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
+    strata_content =
+        design.strata == :false_strata ?
+            "none" :
+            (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
     printinfo(io, "strata", strata_content...)
     # cluster(s) info
-    cluster_content = design.cluster == :false_cluster ?
-                      "none" :
-                      (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
+    cluster_content =
+        design.cluster == :false_cluster ?
+            "none" :
+            (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
     printinfo(io, "cluster", cluster_content...)
     # popsize and sampsize info
     printinfo(io, "popsize", "\n    ", makeshort(design.data[!, design.popsize]))
@@ -60,3 +66,21 @@ function surveyshow(io::IO, design::AbstractSurveyDesign)
     printinfo(io, "weights", "\n    ", makeshort(design.data[!, :weights]))
     printinfo(io, "probs", "\n    ", makeshort(design.data[!, :probs]); newline=false)
 end
+
+"Print information about a replicate design."
+function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
+    type = typeof(design)
+    printstyled(io, "$type:\n"; bold=true)
+    printstyled(io, "data: "; bold=true)
+    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
+    printinfo(io, "cluster", string(design.cluster); newline=true)
+    printinfo(io, "design.data[!,design.cluster]", makeshort(design.data[!,design.cluster]))
+    printinfo(io, "popsize", string(design.popsize); newline=true)
+    printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
+    printinfo(io, "sampsize", string(design.sampsize); newline=true)
+    printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
+    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
+    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
+    printstyled(io, "replicates: "; bold=true)
+    println(io, design.replicates)
+end
\ No newline at end of file

From 15e34ece548cc990bfeca4683150ee2350032705 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Wed, 11 Jan 2023 19:39:24 +0530
Subject: [PATCH 25/80] Revert "Change show for `SurveyDesign`"

This reverts commit ae0cb2aa8bc87b7d318d97046f5f8de8d58a1be9.
---
 src/show.jl | 49 ++++++++++++++++---------------------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 5dd5b21a..3319e653 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -6,20 +6,16 @@ function makeshort(x)
         x = round.(x, sigdigits=3)
     end
     # print short vectors or single values as they are, compress otherwise
-    if length(x) > 1
-        return "[" * (length(x) < 3 ? join(x, ", ") : join(x[1:3], ", ") * "  …  " * string(last(x))) * "]"
-    end
-
-    return x
+    x = length(x) < 3 ? join(x, ", ") : join(x[1:3], ", ") * ", ..., " * string(last(x))
 end
 
 """
 Print information in the form:
     **name:** content[\n]
 """
-function printinfo(io::IO, name::String, content, args...; newline::Bool=true)
+function printinfo(io::IO, name::String, content::String; newline::Bool=true)
     printstyled(io, name, ": "; bold=true)
-    newline ? println(io, content, args...) : print(io, content, args...)
+    newline ? println(io, content) : print(io, content)
 end
 
 "Print information about a survey design."
@@ -37,37 +33,24 @@ function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
     printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
 end
 
-"Print information about a survey design."
-Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) = 
-    surveyshow(IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50)), design)
 
-function surveyshow(io::IO, design::SurveyDesign)
-    # structure name
+"Print information about a survey design."
+function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)
-    # data info
-    printinfo(io, "data", summary(design.data))
-    # strata info
-    strata_content =
-        design.strata == :false_strata ?
-            "none" :
-            (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
-    printinfo(io, "strata", strata_content...)
-    # cluster(s) info
-    cluster_content =
-        design.cluster == :false_cluster ?
-            "none" :
-            (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
-    printinfo(io, "cluster", cluster_content...)
-    # popsize and sampsize info
-    printinfo(io, "popsize", "\n    ", makeshort(design.data[!, design.popsize]))
-    printinfo(io, "sampsize", "\n    ", makeshort(design.data[!, design.sampsize]))
-    # weights and probs info
-    printinfo(io, "weights", "\n    ", makeshort(design.data[!, :weights]))
-    printinfo(io, "probs", "\n    ", makeshort(design.data[!, :probs]); newline=false)
+    printstyled(io, "data: "; bold=true)
+    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
+    printinfo(io, "cluster", string(design.cluster); newline=true)
+    printinfo(io, "design.data[!,design.cluster]", makeshort(design.data[!,design.cluster]))
+    printinfo(io, "popsize", string(design.popsize); newline=true)
+    printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
+    printinfo(io, "sampsize", string(design.sampsize); newline=true)
+    printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
+    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
+    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
 end
 
-"Print information about a replicate design."
+"Print information about a repliocate design."
 function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)

From f38db7da9625ca621fbb1aeea78d1ae93439902a Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Wed, 11 Jan 2023 19:42:41 +0530
Subject: [PATCH 26/80] Revert "Revert "Change show for `SurveyDesign`""

This reverts commit 15e34ece548cc990bfeca4683150ee2350032705.
---
 src/show.jl | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 3319e653..5dd5b21a 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -6,16 +6,20 @@ function makeshort(x)
         x = round.(x, sigdigits=3)
     end
     # print short vectors or single values as they are, compress otherwise
-    x = length(x) < 3 ? join(x, ", ") : join(x[1:3], ", ") * ", ..., " * string(last(x))
+    if length(x) > 1
+        return "[" * (length(x) < 3 ? join(x, ", ") : join(x[1:3], ", ") * "  …  " * string(last(x))) * "]"
+    end
+
+    return x
 end
 
 """
 Print information in the form:
     **name:** content[\n]
 """
-function printinfo(io::IO, name::String, content::String; newline::Bool=true)
+function printinfo(io::IO, name::String, content, args...; newline::Bool=true)
     printstyled(io, name, ": "; bold=true)
-    newline ? println(io, content) : print(io, content)
+    newline ? println(io, content, args...) : print(io, content, args...)
 end
 
 "Print information about a survey design."
@@ -33,24 +37,37 @@ function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
     printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
 end
 
-
 "Print information about a survey design."
-function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign)
+Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) = 
+    surveyshow(IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50)), design)
+
+function surveyshow(io::IO, design::SurveyDesign)
+    # structure name
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)
-    printstyled(io, "data: "; bold=true)
-    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
-    printinfo(io, "cluster", string(design.cluster); newline=true)
-    printinfo(io, "design.data[!,design.cluster]", makeshort(design.data[!,design.cluster]))
-    printinfo(io, "popsize", string(design.popsize); newline=true)
-    printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
-    printinfo(io, "sampsize", string(design.sampsize); newline=true)
-    printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
-    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
+    # data info
+    printinfo(io, "data", summary(design.data))
+    # strata info
+    strata_content =
+        design.strata == :false_strata ?
+            "none" :
+            (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
+    printinfo(io, "strata", strata_content...)
+    # cluster(s) info
+    cluster_content =
+        design.cluster == :false_cluster ?
+            "none" :
+            (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
+    printinfo(io, "cluster", cluster_content...)
+    # popsize and sampsize info
+    printinfo(io, "popsize", "\n    ", makeshort(design.data[!, design.popsize]))
+    printinfo(io, "sampsize", "\n    ", makeshort(design.data[!, design.sampsize]))
+    # weights and probs info
+    printinfo(io, "weights", "\n    ", makeshort(design.data[!, :weights]))
+    printinfo(io, "probs", "\n    ", makeshort(design.data[!, :probs]); newline=false)
 end
 
-"Print information about a repliocate design."
+"Print information about a replicate design."
 function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)

From 50d0e5afce695f3bcb9c92050015965e294dbde7 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Wed, 11 Jan 2023 19:42:54 +0530
Subject: [PATCH 27/80] Revert "Revert "Change show for `AbstractSurveyDesign`
 and `ReplicateDesign`, restructure code""

This reverts commit aaeebb1c35685f64548caaae9349bb1f0b10299e.
---
 src/show.jl | 60 ++++++++++++++++-------------------------------------
 1 file changed, 18 insertions(+), 42 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 5dd5b21a..2af3d6d1 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -1,3 +1,5 @@
+surveyio(io) = IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50))
+
 """
 Helper function that transforms a given `Number` or `Vector` into a short-form string.
 """
@@ -23,41 +25,33 @@ function printinfo(io::IO, name::String, content, args...; newline::Bool=true)
 end
 
 "Print information about a survey design."
-function Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign)
-    type = typeof(design)
-    printstyled(io, "$type:\n"; bold=true)
-    printstyled(io, "data: "; bold=true)
-    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
-    printinfo(io, "weights", makeshort(design.data.weights))
-    printinfo(io, "probs", makeshort(design.data.probs))
-    printinfo(io, "fpc", makeshort(design.data.fpc))
-    printinfo(io, "popsize", makeshort(design.popsize))
-    printinfo(io, "sampsize", makeshort(design.sampsize))
-    printinfo(io, "sampfraction", makeshort(design.sampfraction))
-    printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false)
-end
+Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign) =
+    surveyshow(surveyio(io), design)
 
-"Print information about a survey design."
 Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) = 
-    surveyshow(IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50)), design)
+    surveyshow(surveyio(io), design)
+
+function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
+    # new_io = IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50))
+    surveyshow(surveyio(io), design)
+    printinfo(surveyio(io), "\nreplicates", design.replicates; newline=false)
+end
 
-function surveyshow(io::IO, design::SurveyDesign)
+function surveyshow(io::IO, design::AbstractSurveyDesign)
     # structure name
     type = typeof(design)
     printstyled(io, "$type:\n"; bold=true)
     # data info
     printinfo(io, "data", summary(design.data))
     # strata info
-    strata_content =
-        design.strata == :false_strata ?
-            "none" :
-            (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
+    strata_content = design.strata == :false_strata ?
+                     "none" :
+                     (string(design.strata), "\n    ", makeshort(design.data[!, design.strata]))
     printinfo(io, "strata", strata_content...)
     # cluster(s) info
-    cluster_content =
-        design.cluster == :false_cluster ?
-            "none" :
-            (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
+    cluster_content = design.cluster == :false_cluster ?
+                      "none" :
+                      (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
     printinfo(io, "cluster", cluster_content...)
     # popsize and sampsize info
     printinfo(io, "popsize", "\n    ", makeshort(design.data[!, design.popsize]))
@@ -66,21 +60,3 @@ function surveyshow(io::IO, design::SurveyDesign)
     printinfo(io, "weights", "\n    ", makeshort(design.data[!, :weights]))
     printinfo(io, "probs", "\n    ", makeshort(design.data[!, :probs]); newline=false)
 end
-
-"Print information about a replicate design."
-function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
-    type = typeof(design)
-    printstyled(io, "$type:\n"; bold=true)
-    printstyled(io, "data: "; bold=true)
-    println(io, size(design.data, 1), "x", size(design.data, 2), " DataFrame")
-    printinfo(io, "cluster", string(design.cluster); newline=true)
-    printinfo(io, "design.data[!,design.cluster]", makeshort(design.data[!,design.cluster]))
-    printinfo(io, "popsize", string(design.popsize); newline=true)
-    printinfo(io, "design.data[!,design.popsize]", makeshort(design.data[!,design.popsize]))
-    printinfo(io, "sampsize", string(design.sampsize); newline=true)
-    printinfo(io, "design.data[!,design.sampsize]", makeshort(design.data[!,design.sampsize]))
-    printinfo(io, "design.data[!,:probs]", makeshort(design.data.probs))
-    printinfo(io, "design.data[!,:allprobs]", makeshort(design.data.allprobs))
-    printstyled(io, "replicates: "; bold=true)
-    println(io, design.replicates)
-end
\ No newline at end of file

From 16b04448b7f673a3d7ce528911edd696e10b13bf Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Wed, 11 Jan 2023 21:09:15 +0530
Subject: [PATCH 28/80] Change `surveyio(io)` to `io`

---
 src/show.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 2af3d6d1..90782c5b 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -26,15 +26,15 @@ end
 
 "Print information about a survey design."
 Base.show(io::IO, ::MIME"text/plain", design::AbstractSurveyDesign) =
-    surveyshow(surveyio(io), design)
+    surveyshow(io, design)
 
 Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) = 
-    surveyshow(surveyio(io), design)
+    surveyshow(io, design)
 
 function Base.show(io::IO, ::MIME"text/plain", design::ReplicateDesign)
     # new_io = IOContext(io, :compact=>true, :limit=>true, :displaysize=>(50, 50))
-    surveyshow(surveyio(io), design)
-    printinfo(surveyio(io), "\nreplicates", design.replicates; newline=false)
+    surveyshow(io, design)
+    printinfo(io, "\nreplicates", design.replicates; newline=false)
 end
 
 function surveyshow(io::IO, design::AbstractSurveyDesign)

From 5e102c7a0e2abc7e9ffe9c9cb6e6a16000a681b8 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Thu, 12 Jan 2023 10:44:52 +0530
Subject: [PATCH 29/80] Remove new line after `popsize`, `sampsize` and weights

---
 src/show.jl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/show.jl b/src/show.jl
index 90782c5b..a72c3250 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -11,7 +11,6 @@ function makeshort(x)
     if length(x) > 1
         return "[" * (length(x) < 3 ? join(x, ", ") : join(x[1:3], ", ") * "  …  " * string(last(x))) * "]"
     end
-
     return x
 end
 
@@ -54,9 +53,9 @@ function surveyshow(io::IO, design::AbstractSurveyDesign)
                       (string(design.cluster), "\n    ", makeshort(design.data[!, design.cluster]))
     printinfo(io, "cluster", cluster_content...)
     # popsize and sampsize info
-    printinfo(io, "popsize", "\n    ", makeshort(design.data[!, design.popsize]))
-    printinfo(io, "sampsize", "\n    ", makeshort(design.data[!, design.sampsize]))
+    printinfo(io, "popsize", makeshort(design.data[!, design.popsize]))
+    printinfo(io, "sampsize", makeshort(design.data[!, design.sampsize]))
     # weights and probs info
-    printinfo(io, "weights", "\n    ", makeshort(design.data[!, :weights]))
-    printinfo(io, "probs", "\n    ", makeshort(design.data[!, :probs]); newline=false)
+    printinfo(io, "weights", makeshort(design.data[!, :weights]))
+    printinfo(io, "probs", makeshort(design.data[!, :probs]); newline=false)
 end

From e95e5de5dd0dcb74cff8343098b3763f66144e45 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Thu, 12 Jan 2023 13:56:54 +0530
Subject: [PATCH 30/80] Change docstrings to account for new `show`

---
 src/Survey.jl       |  2 +-
 src/SurveyDesign.jl | 38 ++++++++++++++++++--------------------
 src/bootstrap.jl    | 23 ++++++++++-------------
 src/boxplot.jl      |  2 +-
 src/by.jl           |  2 +-
 src/hist.jl         |  2 +-
 src/jackknife.jl    |  2 +-
 src/mean.jl         | 12 ++++++------
 src/ratio.jl        | 11 ++++-------
 src/total.jl        | 12 ++++++------
 10 files changed, 49 insertions(+), 57 deletions(-)

diff --git a/src/Survey.jl b/src/Survey.jl
index dd71a092..f25e33a7 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -38,4 +38,4 @@ export bootweights
 export jkknife
 export ratio
 
-end
\ No newline at end of file
+end
diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 69657cd0..0cc1e6b0 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -28,19 +28,18 @@ individuals in one cluster are sampled. The clusters are considered disjoint and
 - `popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
 
 ```jldoctest
-julia> apiclus1 = load_data("apiclus1");
+julia> apistrat = load_data("apistrat");
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw)
+julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw)
 SurveyDesign:
-data: 183x46 DataFrame
-cluster: dnum
-design.data[!,design.cluster]: 637, 637, 637, ..., 448
-popsize: popsize
-design.data[!,design.popsize]: 6190.0, 6190.0, 6190.0, ..., 6190.0
-sampsize: sampsize
-design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-design.data[!,:probs]: 0.0295, 0.0295, 0.0295, ..., 0.0295
-design.data[!,:allprobs]: 0.0295, 0.0295, 0.0295, ..., 0.0295
+data: 200×46 DataFrame
+strata: stype
+    [E, E, E  …  H]
+cluster: none
+popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+sampsize: [200, 200, 200  …  200]
+weights: [44.2, 44.2, 44.2  …  15.1]
+probs: [0.0226, 0.0226, 0.0226  …  0.0662]
 ```
 """
 struct SurveyDesign <: AbstractSurveyDesign
@@ -107,15 +106,14 @@ julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
 
 julia> bootstrat = bootweights(strat; replicates=1000)
 ReplicateDesign:
-data: 200x1046 DataFrame
-cluster: false_cluster
-design.data[!,design.cluster]: 1, 2, 3, ..., 200
-popsize: popsize
-design.data[!,design.popsize]: 6190.0, 6190.0, 6190.0, ..., 6190.0
-sampsize: sampsize
-design.data[!,design.sampsize]: 200, 200, 200, ..., 200
-design.data[!,:probs]: 0.0226, 0.0226, 0.0226, ..., 0.0662
-design.data[!,:allprobs]: 0.0226, 0.0226, 0.0226, ..., 0.0662
+data: 200×1046 DataFrame
+strata: stype
+    [E, E, E  …  H]
+cluster: none
+popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+sampsize: [200, 200, 200  …  200]
+weights: [44.2, 44.2, 44.2  …  15.1]
+probs: [0.0226, 0.0226, 0.0226  …  0.0662]
 replicates: 1000
 ```
 """
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index b4e226a8..83defc97 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -4,21 +4,18 @@ julia> using Random
 
 julia> apiclus1 = load_data("apiclus1");
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum);
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum);
 
-julia> rng = MersenneTwister(111);
-
-julia> bootweights(dclus1; replicates=1000, rng)
+julia> bootweights(clus_one_stage; replicates=1000, rng=MersenneTwister(111)) # choose a seed for deterministic results
 ReplicateDesign:
-data: 183x1046 DataFrame
+data: 183×1046 DataFrame
+strata: none
 cluster: dnum
-design.data[!,design.cluster]: 637, 637, 637, ..., 448
-popsize: popsize
-design.data[!,design.popsize]: 183, 183, 183, ..., 183
-sampsize: sampsize
-design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-design.data[!,:probs]: 1.0, 1.0, 1.0, ..., 1.0
-design.data[!,:allprobs]: 1.0, 1.0, 1.0, ..., 1.0
+    [637, 637, 637  …  448]
+popsize: [183, 183, 183  …  183]
+sampsize: [15, 15, 15  …  15]
+weights: [1, 1, 1  …  1]
+probs: [1.0, 1.0, 1.0  …  1.0]
 replicates: 1000
 ```
 """
@@ -51,4 +48,4 @@ function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(
         df[!, "replicate_" * string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
-end
\ No newline at end of file
+end
diff --git a/src/boxplot.jl b/src/boxplot.jl
index 8790f116..8ee3dcc4 100644
--- a/src/boxplot.jl
+++ b/src/boxplot.jl
@@ -10,7 +10,7 @@ The keyword arguments are all the arguments that can be passed to `mapping` in
 
 ```@example boxplot
 apisrs = load_data("apisrs");
-srs = srs = SurveyDesign(apisrs; weights=:pw);
+srs = SurveyDesign(apisrs; weights=:pw);
 bp = boxplot(srs, :stype, :enroll; weights = :pw)
 save("boxplot.png", bp); nothing # hide
 ```
diff --git a/src/by.jl b/src/by.jl
index be26d5a3..a4de2f55 100644
--- a/src/by.jl
+++ b/src/by.jl
@@ -14,4 +14,4 @@ function bydomain(x::Symbol, domain::Symbol, design::ReplicateDesign, func::Func
     replace!(ses, NaN => 0)
     X.SE = ses
     return X
-end
\ No newline at end of file
+end
diff --git a/src/hist.jl b/src/hist.jl
index 90d42d1b..17b54098 100644
--- a/src/hist.jl
+++ b/src/hist.jl
@@ -61,7 +61,7 @@ For the complete argument list see [Makie.hist](https://makie.juliaplots.org/sta
 
 ```@example histogram
 apisrs = load_data("apisrs");
-srs = SimpleRandomSample(apisrs;popsize=:fpc);
+srs = SurveyDesign(apisrs; weights=:pw);
 h = hist(srs, :enroll)
 save("hist.png", h); nothing # hide
 ```
diff --git a/src/jackknife.jl b/src/jackknife.jl
index 794ef10b..55880df9 100644
--- a/src/jackknife.jl
+++ b/src/jackknife.jl
@@ -13,4 +13,4 @@ function jkknife(variable:: Symbol, design::SurveyDesign ,func:: Function;  para
     end
     var = c*(nh-1)/nh
     return DataFrame(Statistic = statistic, SE = sqrt(var))
-end
\ No newline at end of file
+end
diff --git a/src/mean.jl b/src/mean.jl
index 0ef5bb37..c1d80259 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -6,16 +6,16 @@ Compute the estimated mean of one or more variables within a survey design.
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");
 
-julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> mean(:api00, clus1)
+julia> mean(:api00, clus_one_stage)
 1×2 DataFrame
  Row │ mean     SE
      │ Float64  Float64
 ─────┼──────────────────
    1 │ 644.169  23.2919
 
-julia> mean([:api00, :enroll], clus1)
+julia> mean([:api00, :enroll], clus_one_stage)
 2×3 DataFrame
  Row │ names   mean     SE
      │ String  Float64  Float64
@@ -45,9 +45,9 @@ Compute the estimated mean within a domain.
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");
 
-julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> mean(:api00, :cname, clus1)
+julia> mean(:api00, :cname, clus_one_stage)
 11×3 DataFrame
  Row │ cname        mean     SE
      │ String15     Float64  Any
@@ -70,4 +70,4 @@ function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
     df = bydomain(x, domain, design, weighted_mean)
     rename!(df, :statistic => :mean)
     return df
-end
\ No newline at end of file
+end
diff --git a/src/ratio.jl b/src/ratio.jl
index 67e51668..1623eb3a 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -1,17 +1,14 @@
 """
     ratio(numerator, denominator, design)
+
 Estimate the ratio of the columns specified in numerator and denominator
 
 ```jldoctest
-julia> using Survey;
-
 julia> apiclus1 = load_data("apiclus1");
 
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
-
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw);
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw);
 
-julia> ratio(:api00, :enroll, dclus1)
+julia> ratio(:api00, :enroll, clus_one_stage)
 1×2 DataFrame
  Row │ Statistic  SE       
      │ Float64    Float64  
@@ -35,4 +32,4 @@ function ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesig
     end
     var = c*(nh-1)/nh
     return DataFrame(Statistic = statistic, SE = sqrt(var))
-end
\ No newline at end of file
+end
diff --git a/src/total.jl b/src/total.jl
index e5fbbdcb..0c5001e5 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -6,16 +6,16 @@ Compute the estimated population total for one or more variables within a survey
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");
 
-julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> total(:api00, clus1)
+julia> total(:api00, clus_one_stage)
 1×2 DataFrame
  Row │ total      SE
      │ Float64    Float64
 ─────┼──────────────────────
    1 │ 3.98999e6  9.22175e5
 
-julia> total([:api00, :enroll], clus1)
+julia> total([:api00, :enroll], clus_one_stage)
 2×3 DataFrame
  Row │ names   total      SE
      │ String  Float64    Float64
@@ -45,9 +45,9 @@ Compute the estimated population total within a domain.
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");
 
-julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> total(:api00, :cname, clus1)
+julia> total(:api00, :cname, clus_one_stage)
 11×3 DataFrame
  Row │ cname        total           SE
      │ String15     Float64         Any
@@ -68,4 +68,4 @@ julia> total(:api00, :cname, clus1)
 function total(x::Symbol, domain::Symbol, design::ReplicateDesign)
     df = bydomain(x, domain, design, wsum)
     rename!(df, :statistic => :total)
-end
\ No newline at end of file
+end

From 21c6bbd2c4af720eec7ded52e21d30c9972370c5 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Thu, 12 Jan 2023 14:50:42 +0530
Subject: [PATCH 31/80] Add tests for `show`

---
 test/runtests.jl |   3 +-
 test/show.jl     | 111 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 test/show.jl

diff --git a/test/runtests.jl b/test/runtests.jl
index e8f18a3a..6bb9c738 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -16,4 +16,5 @@ include("mean.jl")
 include("plot.jl")
 include("hist.jl")
 include("boxplot.jl")
-include("ratio.jl")
\ No newline at end of file
+include("ratio.jl")
+include("show.jl")
diff --git a/test/show.jl b/test/show.jl
new file mode 100644
index 00000000..f81ed2a3
--- /dev/null
+++ b/test/show.jl
@@ -0,0 +1,111 @@
+@testset "No strata, no clusters" begin
+    io = IOBuffer()
+
+    apisrs = load_data("apisrs")
+    srs = SurveyDesign(apisrs; weights=:pw)
+    refstr = """
+    SurveyDesign:
+    data: 200×47 DataFrame
+    strata: none
+    cluster: none
+    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    sampsize: [200, 200, 200  …  200]
+    weights: [31.0, 31.0, 31.0  …  31.0]
+    probs: [0.0323, 0.0323, 0.0323  …  0.0323]"""
+
+    show(io, MIME("text/plain"), srs)
+    str = String(take!(io))
+    @test str == refstr
+
+    bsrs = srs |> bootweights
+    refstrb = """
+    ReplicateDesign:
+    data: 200×4047 DataFrame
+    strata: none
+    cluster: none
+    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    sampsize: [200, 200, 200  …  200]
+    weights: [31.0, 31.0, 31.0  …  31.0]
+    probs: [0.0323, 0.0323, 0.0323  …  0.0323]
+    replicates: 4000"""
+
+    show(io, MIME("text/plain"), bsrs)
+    strb = String(take!(io))
+    @test strb == refstrb
+end
+
+@testset "With strata, no clusters" begin
+    io = IOBuffer()
+
+    apistrat = load_data("apistrat")
+    strat = SurveyDesign(apistrat; strata=:stype, weights=:pw)
+    refstr = """
+    SurveyDesign:
+    data: 200×46 DataFrame
+    strata: stype
+        [E, E, E  …  H]
+    cluster: none
+    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    sampsize: [200, 200, 200  …  200]
+    weights: [44.2, 44.2, 44.2  …  15.1]
+    probs: [0.0226, 0.0226, 0.0226  …  0.0662]"""
+
+    show(io, MIME("text/plain"), strat)
+    str = String(take!(io))
+    @test str == refstr
+
+    stratb = strat |> bootweights
+    refstrb = """
+    ReplicateDesign:
+    data: 200×4046 DataFrame
+    strata: stype
+        [E, E, E  …  H]
+    cluster: none
+    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    sampsize: [200, 200, 200  …  200]
+    weights: [44.2, 44.2, 44.2  …  15.1]
+    probs: [0.0226, 0.0226, 0.0226  …  0.0662]
+    replicates: 4000"""
+
+    show(io, MIME("text/plain"), stratb)
+    strb = String(take!(io))
+    @test strb == refstrb
+end
+
+@testset "No strata, with clusters" begin
+    io = IOBuffer()
+
+    apiclus1 = load_data("apiclus1")
+    clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
+    refstr = """
+    SurveyDesign:
+    data: 183×46 DataFrame
+    strata: none
+    cluster: dnum
+        [637, 637, 637  …  448]
+    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    sampsize: [15, 15, 15  …  15]
+    weights: [33.8, 33.8, 33.8  …  33.8]
+    probs: [0.0295, 0.0295, 0.0295  …  0.0295]"""
+
+    show(io, MIME("text/plain"), clus_one_stage)
+    str = String(take!(io))
+    @test str == refstr
+
+    clus_one_stageb = clus_one_stage |> bootweights
+    refstrb = """
+    ReplicateDesign:
+    data: 183×4046 DataFrame
+    strata: none
+    cluster: dnum
+        [637, 637, 637  …  448]
+    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    sampsize: [15, 15, 15  …  15]
+    weights: [33.8, 33.8, 33.8  …  33.8]
+    probs: [0.0295, 0.0295, 0.0295  …  0.0295]
+    replicates: 4000"""
+
+    show(io, MIME("text/plain"), clus_one_stageb)
+    strb = String(take!(io))
+    @test strb == refstrb
+end

From d3d0a445a4e39415b2bcdfd78ca6e1e1f0d960b2 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 13 Jan 2023 14:08:41 +0530
Subject: [PATCH 32/80] Update README according to new `show`

---
 README.md | 218 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 139 insertions(+), 79 deletions(-)

diff --git a/README.md b/README.md
index 65f70cd2..3f1542a3 100644
--- a/README.md
+++ b/README.md
@@ -6,111 +6,171 @@
 [![codecov](https://codecov.io/gh/xKDR/Survey.jl/branch/main/graph/badge.svg?token=4PFSF47BT2)](https://codecov.io/gh/xKDR/Survey.jl)
 [![Milestones](https://img.shields.io/badge/-milestones-brightgreen)](https://github.com/xKDR/Survey.jl/milestones)
 
+This package is used to study complex survey data. It aims to be a fast alternative
+to the [Survey package in R](https://cran.r-project.org/web/packages/survey/index.html)
+developed by [Professor Thomas Lumley](https://www.stat.auckland.ac.nz/people/tlum005).
 
-This package is used to study complex survey data. It aims to be a fast alternative to the [Survey package in R](https://cran.r-project.org/web/packages/survey/index.html) developed by [Professor Thomas Lumley](https://www.stat.auckland.ac.nz/people/tlum005).
+All types of survey design are supported by this package.
 
-This package currently supports simple random sample and stratified sample. In future releases, it will support multistage sampling as well. 
+> **_NOTE:_**  For multistage sampling a single stage approximation is used. For
+more information see the [TODO](https://xkdr.github.io/Survey.jl/dev/) section of
+the documentation.
 
-## Documentation
-See [Documentation](https://xkdr.github.io/Survey.jl/dev/) to learn how to use the package 
-
-## How to install
+## Installation
 ```julia
 ]  add "https://github.com/xKDR/Survey.jl.git"
 ```
+
 ## Basic usage
 
-### Simple Random Sample
+The `SurveyDesign` constructor can take data corresponding to any type of design.
+Depending on the keyword arguments passed, the data is processed in order to obtain
+correct results for the given design.
 
-In the following example, we will load a simple random sample of the Academic Performance Index dataset for Californian schools and do basic analysis. 
-```julia
-using Survey
+The following examples show how to create and manipulate different survey designs
+using the [Academic Performance Index dataset for Californian schools](https://r-survey.r-forge.r-project.org/survey/html/api.html).
+
+### Constructing a survey design
+
+A survey design can be created by calling the constructor with some keywords,
+depending on the survey type. Let's create a simple random sample, a stratified
+sample, a single-stage and a two-stage cluster sample.
 
-srs = load_data("apisrs")
+```julia
+julia> apisrs = load_data("apisrs");
+
+julia> srs = SurveyDesign(apisrs; weights=:pw)
+SurveyDesign:
+data: 200×47 DataFrame
+strata: none
+cluster: none
+popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+sampsize: [200, 200, 200  …  200]
+weights: [31.0, 31.0, 31.0  …  31.0]
+probs: [0.0323, 0.0323, 0.0323  …  0.0323]
+
+julia> apistrat = load_data("apistrat");
+
+julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw)
+SurveyDesign:
+data: 200×46 DataFrame
+strata: stype
+    [E, E, E  …  H]
+cluster: none
+popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+sampsize: [200, 200, 200  …  200]
+weights: [44.2, 44.2, 44.2  …  15.1]
+probs: [0.0226, 0.0226, 0.0226  …  0.0662]
+
+julia> apiclus1 = load_data("apiclus1");
+
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw)
+SurveyDesign:
+data: 183×46 DataFrame
+strata: none
+cluster: dnum
+    [637, 637, 637  …  448]
+popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+sampsize: [15, 15, 15  …  15]
+weights: [33.8, 33.8, 33.8  …  33.8]
+probs: [0.0295, 0.0295, 0.0295  …  0.0295]
+
+julia> apiclus2 = load_data("apiclus2");
+
+julia> clus_two_stage = SurveyDesign(apiclus2; clusters=[:dnum, :snum], weights=:pw)
+SurveyDesign:
+data: 126×47 DataFrame
+strata: none
+cluster: dnum
+    [15, 63, 83  …  795]
+popsize: [5130.0, 5130.0, 5130.0  …  5130.0]
+sampsize: [40, 40, 40  …  40]
+weights: [18.9, 18.9, 18.9  …  18.9]
+probs: [0.0528, 0.0528, 0.0528  …  0.0528]
+```
 
-dsrs = SimpleRandomSample(srs; weights = :pw)
+Using these designs we can compute estimates of statistics such as mean and
+population total. The designs must first be resampled using
+[bootstrapping](https://en.wikipedia.org/wiki/Bootstrapping_(statistics)) in order
+to compute the standard errors.
 
-mean(:api00, dsrs)
+```julia
+julia> bootsrs = bootweights(srs; replicates=1000)
+ReplicateDesign:
+data: 200×1047 DataFrame
+strata: none
+cluster: none
+popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+sampsize: [200, 200, 200  …  200]
+weights: [31.0, 31.0, 31.0  …  31.0]
+probs: [0.0323, 0.0323, 0.0323  …  0.0323]
+replicates: 1000
+
+julia> mean(:api00, bootsrs)
 1×2 DataFrame
- Row │ mean     SE      
-     │ Float64  Float64 
+ Row │ mean     SE
+     │ Float64  Float64
 ─────┼──────────────────
-   1 │ 656.585  9.24972
+   1 │ 656.585   9.5409
 
-total(:enroll, dsrs)
+julia> total(:enroll, bootsrs)
 1×2 DataFrame
- Row │ total      SE       
-     │ Float64    Float64  
-─────┼─────────────────────
-   1 │ 3.62107e6  1.6952e5  
-
-mean(:api00, :cname, dsrs)
-38×3 DataFrame
- Row │ cname            mean     SE       
-     │ String15         Float64  Float64  
-─────┼────────────────────────────────────
-   1 │ Kern             573.6     42.8026
-   2 │ Los Angeles      658.156   21.0728
-   3 │ Orange           749.333   27.0613
-  ⋮  │        ⋮            ⋮        ⋮
-  36 │ Napa             727.0     46.722
-  37 │ Lake             804.0    NaN
-  38 │ Merced           595.0    NaN
-
-quantile(:enroll,dsrs,[0.1,0.2,0.5,0.75,0.95])
-5×2 DataFrame
- Row │ probability  quantile 
-     │ Float64      Float64  
-─────┼───────────────────────
-   1 │        0.1      245.5
-   2 │        0.2      317.6
-   3 │        0.5      453.0
-   4 │        0.75     668.5
-   5 │        0.95    1473.1
+ Row │ total      SE
+     │ Float64    Float64
+─────┼──────────────────────
+   1 │ 3.62107e6  1.72846e5
 ```
 
-### Stratified Sample
-
-In the following example, we will load a stratified sample of the Academic Performance Index dataset for Californian schools and do basic analysis. 
+Now we know the mean academic performance index from the year 2000 and the total
+number of students enrolled in the sampled Californian schools. We can also
+calculate the statistic of multiple variables in one go...
 
 ```julia
-using Survey
+julia> mean([:api99, :api00], bootsrs)
+2×3 DataFrame
+ Row │ names   mean     SE
+     │ String  Float64  Float64
+─────┼──────────────────────────
+   1 │ api99   624.685  9.84669
+   2 │ api00   656.585  9.5409
+```
+
+... or we can calculate domain estimates:
 
-strat = load_data("apistrat")
+```julia
+julia> total(:enroll, :cname, bootsrs)
+38×3 DataFrame
+ Row │ cname            total           SE
+     │ String15         Float64         Any
+─────┼────────────────────────────────────────────
+   1 │ Kern                  1.95823e5  74731.2
+   2 │ Los Angeles      867129.0        1.36622e5
+   3 │ Orange                1.68786e5  63858.0
+   4 │ San Luis Obispo    6720.49       6790.49
+  ⋮  │        ⋮               ⋮             ⋮
+  35 │ Calaveras         12976.4        13241.6
+  36 │ Napa              39239.0        30181.9
+  37 │ Lake               6410.79       6986.29
+  38 │ Merced            15392.1        15202.2
+                                   30 rows omitted
+```
 
-dstrat = StratifiedSample(strat, :stype; weights = :pw, popsize = :fpc)
+This gives us the total number of enrolled students in each county.
 
-mean(:api00, dstrat)
-1×2 DataFrame
- Row │ mean     SE      
-     │ Float64  Float64 
-─────┼──────────────────
-   1 │ 662.287  9.40894
+All functionalities are supported by each design type. For a more complete guide,
+see the [Tutorial](https://xkdr.github.io/Survey.jl/dev/#Basic-demo) section in
+the documentation.
 
-total(:api00, dstrat)
-1×2 DataFrame
- Row │ total      SE      
-     │ Float64    Float64 
-─────┼────────────────────
-   1 │ 4.10221e6  58279.0
-
-mean(:api00, :cname, dstrat)
-40×3 DataFrame
- Row │ cname           mean     SE           
-     │ String15        Float64  Float64      
-─────┼───────────────────────────────────────
-   1 │ Los Angeles     633.511  21.3912
-   2 │ Ventura         707.172  31.6856
-   3 │ Kern            678.235  53.1337
-  ⋮  │       ⋮            ⋮          ⋮
-  39 │ Mendocino       632.018   1.04942
-  40 │ Butte           627.0     0.0
-```
+## Future goals
 
-## Strategic goals
-We want to implement all the features provided by the [Survey package in R](https://cran.r-project.org/web/packages/survey/index.html)
+We want to implement all the features provided by the
+[Survey package in R](https://cran.r-project.org/web/packages/survey/index.html)
+in a Julia-native way. The main goal is to have a complete package that provides
+a large range of functionality and takes efficiency into consideration, such that
+large surveys can be analysed fast.
 
-The [milestones](https://github.com/xKDR/Survey.jl/milestones) sections of the repository contains a list of features that contributors can implement in the short-term.
+The [milestones](https://github.com/xKDR/Survey.jl/milestones) section of the repository
+contains a list of features that contributors can implement in the short-term.
 
 ## Support
 

From dbc6ba771cbbffb038291b560a04adeb9e6e87de Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 13 Jan 2023 14:12:16 +0530
Subject: [PATCH 33/80] Change Future goals to Goals

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3f1542a3..df4f5696 100644
--- a/README.md
+++ b/README.md
@@ -161,7 +161,7 @@ All functionalities are supported by each design type. For a more complete guide
 see the [Tutorial](https://xkdr.github.io/Survey.jl/dev/#Basic-demo) section in
 the documentation.
 
-## Future goals
+## Goals
 
 We want to implement all the features provided by the
 [Survey package in R](https://cran.r-project.org/web/packages/survey/index.html)

From bd59a671dcf2154e7378cf0bf5bc37db33dd8ef0 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Fri, 13 Jan 2023 20:28:09 +0530
Subject: [PATCH 34/80] add ht.jl to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index e4441b23..494288f6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@
 .gitignore
 .DS_Store
 *.json
+src/ht.jl
\ No newline at end of file

From 3f4c07ff30d21f9dd204c803ac820c88484e5540 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sat, 14 Jan 2023 01:42:05 +0530
Subject: [PATCH 35/80] digits=4 in show, combine not popsize in weight, strat
 tests

---
 src/SurveyDesign.jl | 48 +++++++++++++++++++++++++--------------------
 src/show.jl         |  6 +++---
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 596d925a..c2dde058 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -25,21 +25,22 @@ individuals in one cluster are sampled. The clusters are considered disjoint and
 - `strata::Union{Nothing, Symbol}=nothing`: the stratification variable.
 - `clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing`: the clustering variable.
 - `weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
-- `popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
+- `popsize::Union{Nothing, Symbol}=nothing`: the (expected) survey population size.
 
 ```jldoctest
-julia> apistrat = load_data("apistrat");
+julia> apiclus1 = load_data("apiclus1");
 
-julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw)
+julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, strata=:stype, weights=:pw)
 SurveyDesign:
-data: 200×46 DataFrame
+data: 183×43 DataFrame
 strata: stype
-    [E, E, E  …  H]
-cluster: none
-popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
-sampsize: [200, 200, 200  …  200]
-weights: [44.2, 44.2, 44.2  …  15.1]
-probs: [0.0226, 0.0226, 0.0226  …  0.0662]
+    [H, E, E  …  E]
+cluster: dnum
+    [637, 637, 637  …  448]
+popsize: [507.7049, 507.7049, 507.7049  …  507.7049]
+sampsize: [15, 15, 15  …  15]
+weights: [33.847, 33.847, 33.847  …  33.847]
+allprobs: [0.0295, 0.0295, 0.0295  …  0.0295]
 ```
 """
 struct SurveyDesign <: AbstractSurveyDesign
@@ -74,28 +75,33 @@ struct SurveyDesign <: AbstractSurveyDesign
         if typeof(clusters) <: Symbol
             cluster = clusters
         end
-        # For one-stage sample only one sampsize vector
+        # For single-stage approximation only one "effective" sampsize vector
         sampsize_labels = :sampsize
-        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])), (nrow(data),))
-        if !(typeof(popsize) <: Nothing)
+        if isa(strata,Symbol) && isnothing(clusters) # If stratified sample then sampsize is inside strata
+            data[!, sampsize_labels] = transform(groupby(data, strata), nrow => :counts).counts
+        else
+            data[!, sampsize_labels] = fill(length(unique(data[!, cluster])), (nrow(data),))
+        end
+        if isa(popsize, Symbol)
             weights_labels = :weights
             data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
-        elseif typeof(weights) <: Symbol
+        elseif isa(weights, Symbol)
             if !(typeof(data[!, weights]) <: Vector{<:Real})
-                error(string("given weights column ", weights , " is not of numeric type"))
+                throw(ArgumentError(string("given weights column ", weights , " is not of numeric type")))
+            else
+                weights_labels = weights
+                # derive popsize from given `weights`
+                popsize = :popsize
+                data[!, popsize] = data[!, sampsize_labels] .* data[!, weights_labels]
             end
-            weights_labels = weights
         else
+            # neither popsize nor weights given
             weights_labels = :weights
             data[!, weights_labels] = repeat([1], nrow(data))
         end
         allprobs_labels = :allprobs
         data[!, allprobs_labels] = 1 ./ data[!, weights_labels] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
-        pps = false # for now no explicit pps support
-        if !(typeof(popsize) <: Symbol)
-            popsize = :popsize
-            data[!,popsize] = repeat([sum(data[!, weights_labels])], nrow(data))
-        end
+        pps = false # for now no explicit pps supported faster functions, but they can be added
         new(data, cluster, popsize, sampsize_labels, strata, weights_labels, allprobs_labels, pps)
     end
 end
diff --git a/src/show.jl b/src/show.jl
index a72c3250..730d506b 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -5,7 +5,7 @@ Helper function that transforms a given `Number` or `Vector` into a short-form s
 """
 function makeshort(x)
     if isa(x[1], Float64)
-        x = round.(x, sigdigits=3)
+        x = round.(x, digits=4) # Rounded to 4 digits after the decimal place
     end
     # print short vectors or single values as they are, compress otherwise
     if length(x) > 1
@@ -56,6 +56,6 @@ function surveyshow(io::IO, design::AbstractSurveyDesign)
     printinfo(io, "popsize", makeshort(design.data[!, design.popsize]))
     printinfo(io, "sampsize", makeshort(design.data[!, design.sampsize]))
     # weights and probs info
-    printinfo(io, "weights", makeshort(design.data[!, :weights]))
-    printinfo(io, "probs", makeshort(design.data[!, :probs]); newline=false)
+    printinfo(io, "weights", makeshort(design.data[!, design.weights]))
+    printinfo(io, "allprobs", makeshort(design.data[!, design.allprobs]); newline=false)
 end

From c00d5937d298a90319de903e4eb8c8c9985435bd Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sat, 14 Jan 2023 01:42:22 +0530
Subject: [PATCH 36/80] stratified tests

---
 test/SurveyDesign.jl | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index be515926..baa07b45 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -21,7 +21,7 @@
     ##############################
     ### Weights as non-numeric error
     apisrs = copy(apisrs_original)
-    @test_throws ErrorException SurveyDesign(apisrs, weights=:stype)
+    @test_throws ArgumentError SurveyDesign(apisrs, weights=:stype)
 end
 
 @testset "SurveyDesign_strat" begin
@@ -34,13 +34,25 @@ end
     ### weights as Symbol
     apistrat = copy(apistrat_original)
     strat_wt = SurveyDesign(apistrat, strata=:stype, weights=:pw)
+    @test strat_wt.data[!,strat_wt.weights][1] ≈ 44.2100 atol = 1e-4
+    @test strat_wt.data[!,strat_wt.weights][200] ≈ 15.1000 atol = 1e-4
     @test strat_wt.data[!,strat_wt.weights] == 1 ./ strat_wt.data[!,strat_wt.allprobs]
-    ### popsize as Symbol
+    ### popsize as Symbol (should be same as above)
     apistrat = copy(apistrat_original)
     strat_pop = SurveyDesign(apistrat, strata=:stype, popsize=:fpc)
+    @test strat_pop.data[!,strat_pop.weights][1] ≈ 44.2100 atol = 1e-4
+    @test strat_pop.data[!,strat_pop.weights][200] ≈ 15.1000 atol = 1e-4
     @test strat_pop.data[!,strat_pop.weights] == 1 ./ strat_pop.data[!,strat_pop.allprobs]
+    ### popsize and weights as Symbol (should be same as above two)
+    apistrat = copy(apistrat_original)
+    dstrat = SurveyDesign(apistrat, strata=:stype, weights=:pw, popsize=:fpc)
+    @test dstrat.data[!,dstrat.weights][1] ≈ 44.2100 atol = 1e-4
+    @test dstrat.data[!,dstrat.weights][200] ≈ 15.1000 atol = 1e-4
+    @test dstrat.data[!,dstrat.weights] == 1 ./ dstrat.data[!,dstrat.allprobs]
     ##############################
-    # @test strat_pop.data[!,strat_pop.weights] == strat_wt.data[!,strat_wt.weights]
+    # Check all three ways get equivalent weights
+    @test strat_pop.data[!,strat_pop.weights] ≈ strat_wt.data[!,strat_wt.weights] rtol = 1e-4
+    @test strat_wt.data[!,strat_wt.weights] ≈ strat_wt.data[!,strat_wt.weights] rtol = 1e-4
 end
 
 @testset "SurveyDesign_multistage" begin
@@ -51,10 +63,8 @@ end
     # one-stage cluster sample with popsize
     apiclus1 = copy(apiclus1_original)
     dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize =:fpc)
-    @test dclus1.data[!, :weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
-    @test dclus1.data[!,dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
-    @test dclus1.data[!,:allprobs] ≈ dclus1.data[!,:probs] atol = 1e-4
-    
+    @test dclus1.data[!, dclus1.weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
+    @test dclus1.data[!, dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
     ##############################
     # Load API datasets
     nhanes = load_data("nhanes")
@@ -68,9 +78,9 @@ end
     ##############################
     # NHANES
     nhanes = copy(nhanes_original)
-    dnhanes = SingleStageSurveyDesign(nhanes; cluster = :SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR)
+    dnhanes = SurveyDesign(nhanes; clusters = :SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR)
     ##############################
     # YRBS
     yrbs = copy(yrbs_original)
-    dyrbs = SingleStageSurveyDesign(yrbs; cluster = :psu, strata=:stratum, weights=:weight)
+    dyrbs = SurveyDesign(yrbs; clusters = :psu, strata=:stratum, weights=:weight)
 end

From ece7181e9417c595c06639f585a0df685282b550 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 15 Jan 2023 11:44:04 +0530
Subject: [PATCH 37/80] Add apiclus2 and sampsize testing

---
 src/SurveyDesign.jl  |  2 +-
 test/SurveyDesign.jl | 67 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index c2dde058..ff67f682 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -89,8 +89,8 @@ struct SurveyDesign <: AbstractSurveyDesign
             if !(typeof(data[!, weights]) <: Vector{<:Real})
                 throw(ArgumentError(string("given weights column ", weights , " is not of numeric type")))
             else
-                weights_labels = weights
                 # derive popsize from given `weights`
+                weights_labels = weights
                 popsize = :popsize
                 data[!, popsize] = data[!, sampsize_labels] .* data[!, weights_labels]
             end
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index baa07b45..de74f2f4 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -11,11 +11,15 @@
     srs_weights = SurveyDesign(apisrs, weights=:pw)
     @test srs_weights.data[!,srs_weights.weights][1] ≈ 30.97 atol = 1e-4
     @test srs_weights.data[!,srs_weights.weights] == 1 ./ srs_weights.data[!,srs_weights.allprobs]
+    @test srs_weights.data[!,srs_weights.allprobs] ≈ srs_weights.data[!, :derived_probs] atol = 1e-4
+    @test srs_weights.data[!,srs_weights.sampsize] ≈ srs_weights.data[!, :derived_sampsize] atol = 1e-4
     ### popsize as Symbol
     apisrs = copy(apisrs_original)
     srs_pop = SurveyDesign(apisrs, popsize=:fpc)
     @test srs_pop.data[!,srs_pop.weights][1] ≈ 30.97 atol = 1e-4
     @test srs_pop.data[!,srs_pop.weights] == 1 ./ srs_pop.data[!,srs_pop.allprobs]
+    @test srs_pop.data[!,srs_pop.allprobs] ≈ srs_pop.data[!, :derived_probs] atol = 1e-4
+    @test srs_pop.data[!,srs_pop.sampsize] ≈ srs_pop.data[!, :derived_sampsize] atol = 1e-4
     ### Both ways should achieve same weights and allprobs!
     @test srs_pop.data[!,srs_pop.weights] == srs_weights.data[!,srs_weights.weights]
     ##############################
@@ -37,38 +41,91 @@ end
     @test strat_wt.data[!,strat_wt.weights][1] ≈ 44.2100 atol = 1e-4
     @test strat_wt.data[!,strat_wt.weights][200] ≈ 15.1000 atol = 1e-4
     @test strat_wt.data[!,strat_wt.weights] == 1 ./ strat_wt.data[!,strat_wt.allprobs]
-    ### popsize as Symbol (should be same as above)
+    @test strat_wt.data[!,strat_wt.allprobs] ≈ strat_wt.data[!, :derived_probs] atol = 1e-4
+    @test strat_wt.data[!,strat_wt.sampsize] ≈ strat_wt.data[!, :derived_sampsize] atol = 1e-4
+    ### popsize as Symbol (should be same as above (for now))
     apistrat = copy(apistrat_original)
     strat_pop = SurveyDesign(apistrat, strata=:stype, popsize=:fpc)
     @test strat_pop.data[!,strat_pop.weights][1] ≈ 44.2100 atol = 1e-4
     @test strat_pop.data[!,strat_pop.weights][200] ≈ 15.1000 atol = 1e-4
     @test strat_pop.data[!,strat_pop.weights] == 1 ./ strat_pop.data[!,strat_pop.allprobs]
+    @test strat_pop.data[!,strat_pop.allprobs] ≈ strat_pop.data[!, :derived_probs] atol = 1e-4
+    @test strat_pop.data[!,strat_pop.sampsize] ≈ strat_pop.data[!, :derived_sampsize] atol = 1e-4
     ### popsize and weights as Symbol (should be same as above two)
     apistrat = copy(apistrat_original)
     dstrat = SurveyDesign(apistrat, strata=:stype, weights=:pw, popsize=:fpc)
     @test dstrat.data[!,dstrat.weights][1] ≈ 44.2100 atol = 1e-4
     @test dstrat.data[!,dstrat.weights][200] ≈ 15.1000 atol = 1e-4
     @test dstrat.data[!,dstrat.weights] == 1 ./ dstrat.data[!,dstrat.allprobs]
+    @test dstrat.data[!,dstrat.allprobs] ≈ dstrat.data[!, :derived_probs] atol = 1e-4
+    @test dstrat.data[!,dstrat.sampsize] ≈ dstrat.data[!, :derived_sampsize] atol = 1e-4
     ##############################
     # Check all three ways get equivalent weights
     @test strat_pop.data[!,strat_pop.weights] ≈ strat_wt.data[!,strat_wt.weights] rtol = 1e-4
     @test strat_wt.data[!,strat_wt.weights] ≈ strat_wt.data[!,strat_wt.weights] rtol = 1e-4
 end
 
-@testset "SurveyDesign_multistage" begin
+@testset "SurveyDesign_apiclus1" begin
     # Load API datasets
     apiclus1_original = load_data("apiclus1")
     apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
+    apiclus1_original[!, :derived_probs] = 1 ./ apiclus1_original.pw
     ##############################
     # one-stage cluster sample with popsize
     apiclus1 = copy(apiclus1_original)
     dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize =:fpc)
     @test dclus1.data[!, dclus1.weights] ≈ fill(50.4667,size(apiclus1,1)) atol = 1e-3
     @test dclus1.data[!, dclus1.sampsize] ≈ fill(15,size(apiclus1,1))
-    ##############################
+    @test dclus1.data[!, dclus1.allprobs] ≈ dclus1.data[!, :derived_probs] atol = 1e-4
+end
+
+@testset "SurveyDesign_apiclus2" begin
     # Load API datasets
-    nhanes = load_data("nhanes")
-    nhanes_design = SurveyDesign(nhanes; clusters = :SDMVPSU, strata = :SDMVSTRA, weights = :WTMEC2YR)
+    apiclus2_original = load_data("apiclus2")
+    apiclus2_original[!, :derived_probs] = 1 ./ apiclus2_original.pw
+    ##############################
+    calculated_probs_R = [0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.024018254, 0.024018254, 0.024018254, 0.024018254,
+    0.024018254, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.007338911, 0.007338911, 0.007338911,
+    0.007338911, 0.007338911, 0.052840159, 0.009435743, 0.009435743,
+    0.009435743, 0.009435743, 0.009435743, 0.037742970, 0.037742970,
+    0.037742970, 0.037742970, 0.037742970, 0.003669455, 0.003669455,
+    0.003669455, 0.003669455, 0.003669455, 0.018871485, 0.018871485,
+    0.018871485, 0.018871485, 0.018871485, 0.037742970, 0.037742970,
+    0.037742970, 0.037742970, 0.037742970, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.052840159, 0.052840159, 0.052840159,
+    0.052840159, 0.052840159, 0.029355644, 0.029355644, 0.029355644,
+    0.029355644, 0.029355644, 0.052840159, 0.052840159, 0.052840159,
+    0.044033465, 0.044033465, 0.044033465, 0.044033465, 0.044033465,
+    0.052840159]
+
+    # two stage cluster sampling `with replacement'
+    apiclus2 = copy(apiclus2_original)
+    dclus2 = SurveyDesign(apiclus2; clusters = [:dnum,:snum], weights=:pw) # cant pass popsize as Vector
+    @test dclus2.data[!,dclus2.weights][1] ≈ 1 / calculated_probs_R[1] atol = 1e-4
+    @test dclus2.data[!,dclus2.weights][25] ≈ 1 / calculated_probs_R[25] atol = 1e-4
+    @test dclus2.data[!,dclus2.weights][121] ≈ 1 / calculated_probs_R[121] atol = 1e-4
+    @test dclus2.data[!,dclus2.weights][125] ≈ 1 / calculated_probs_R[125] atol = 1e-4
+
+    # TODO: sampsize and popsize testing
+    ## NOT THE SAME AS R object right now
+    
+    #########################
+    ## Complete multistage sampling (when implemented) should look like
+    ## weights should theoretically be optional if both clusters and popsize given
+    # dclus2_complete = SurveyDesign(apiclus2; clusters = [:dnum,:snum], popsize=[:fpc1,:fpc2], {weights=:pw})
 end
 
 @testset "SurveyDesign_realSurveys" begin

From 266fad0c885514ebc1d10d080665daba08cada86 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 15 Jan 2023 12:30:33 +0530
Subject: [PATCH 38/80] bootstrap change :weights to design.weights

---
 src/bootstrap.jl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 83defc97..a5e9f019 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -4,18 +4,18 @@ julia> using Random
 
 julia> apiclus1 = load_data("apiclus1");
 
-julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum);
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, popsize=:fpc);
 
 julia> bootweights(clus_one_stage; replicates=1000, rng=MersenneTwister(111)) # choose a seed for deterministic results
 ReplicateDesign:
-data: 183×1046 DataFrame
+data: 183×1044 DataFrame
 strata: none
 cluster: dnum
     [637, 637, 637  …  448]
-popsize: [183, 183, 183  …  183]
+popsize: [757, 757, 757  …  757]
 sampsize: [15, 15, 15  …  15]
-weights: [1, 1, 1  …  1]
-probs: [1.0, 1.0, 1.0  …  1.0]
+weights: [50.4667, 50.4667, 50.4667  …  50.4667]
+allprobs: [0.0198, 0.0198, 0.0198  …  0.0198]
 replicates: 1000
 ```
 """
@@ -34,7 +34,7 @@ function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(
             rh = [(count(==(i), randinds)) for i in 1:nh] # main bootstrap algo. 
             gdf = groupby(substrata, design.cluster)
             for i in 1:nh
-                gdf[i].whij = repeat([rh[i]], nrow(gdf[i])) .* gdf[i].weights .* (nh / (nh - 1))
+                gdf[i].whij = repeat([rh[i]], nrow(gdf[i])) .* gdf[i][!,design.weights] .* (nh / (nh - 1))
             end            
             stratified[h].whij = transform(gdf).whij
             
@@ -47,5 +47,5 @@ function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(
     for i in 2:(replicates)
         df[!, "replicate_" * string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
-    return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
+    return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.weights, design.allprobs, design.pps, replicates) 
 end

From d59b9a48131b86a66ee7c0c79d28a8a79e6512de Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 15 Jan 2023 12:31:33 +0530
Subject: [PATCH 39/80] Change :weights to design.weights

---
 src/by.jl       | 2 +-
 src/hist.jl     | 2 +-
 src/mean.jl     | 6 +++---
 src/plot.jl     | 2 +-
 src/quantile.jl | 2 +-
 src/total.jl    | 2 +-
 test/plot.jl    | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/by.jl b/src/by.jl
index a4de2f55..cea2187d 100644
--- a/src/by.jl
+++ b/src/by.jl
@@ -1,7 +1,7 @@
 function bydomain(x::Symbol, domain::Symbol, design::ReplicateDesign, func::Function)
     gdf = groupby(design.data, domain)
     nd = length(unique(design.data[!, domain]))
-    X = combine(gdf, [x, :weights] => ((a, b) -> func(a, weights(b))) => :statistic)
+    X = combine(gdf, [x, design.weights] => ((a, b) -> func(a, weights(b))) => :statistic)
     Xt_mat = Array{Float64, 2}(undef, (nd, design.replicates))
     for i in 1:design.replicates
         Xt_mat[:, i] = combine(gdf, [x, Symbol("replicate_"*string(i))] => ((a, c) -> func(a, weights(c))) => :statistic).statistic
diff --git a/src/hist.jl b/src/hist.jl
index 17b54098..40935a1e 100644
--- a/src/hist.jl
+++ b/src/hist.jl
@@ -74,7 +74,7 @@ function hist(design::AbstractSurveyDesign, var::Symbol,
 				 kwargs...
     			)
 	hist = histogram(bins = bins, normalization = normalization, kwargs...)
-	data(design.data) * mapping(var, weights = :weights) * hist |> draw
+	data(design.data) * mapping(var, weights = design.weights) * hist |> draw
 end
 
 function hist(design::AbstractSurveyDesign, var::Symbol,
diff --git a/src/mean.jl b/src/mean.jl
index c1d80259..593b1d79 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -10,8 +10,8 @@ julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
 
 julia> mean(:api00, clus_one_stage)
 1×2 DataFrame
- Row │ mean     SE
-     │ Float64  Float64
+ Row │ mean     SE      
+     │ Float64  Float64 
 ─────┼──────────────────
    1 │ 644.169  23.2919
 
@@ -25,7 +25,7 @@ julia> mean([:api00, :enroll], clus_one_stage)
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
-    X = mean(design.data[!, x], weights(design.data.weights))
+    X = mean(design.data[!, x], weights(design.data[!,design.weights]))
     Xt = [mean(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(mean = X, SE = sqrt(variance))
diff --git a/src/plot.jl b/src/plot.jl
index 7dd4f555..79f1b97d 100644
--- a/src/plot.jl
+++ b/src/plot.jl
@@ -16,5 +16,5 @@ save("scatter.png", s); nothing # hide
 ![](assets/scatter.png)
 """
 function plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
-    data(design.data) * mapping(x, y, markersize = :weights) * visual(Scatter, marker = '￮') |> draw
+    data(design.data) * mapping(x, y, markersize = design.weights) * visual(Scatter, marker = '￮') |> draw
 end
diff --git a/src/quantile.jl b/src/quantile.jl
index d4e399a5..09ba9326 100644
--- a/src/quantile.jl
+++ b/src/quantile.jl
@@ -37,7 +37,7 @@ julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
 function quantile(var::Symbol, design::SurveyDesign, p::Union{<:Real,Vector{<:Real}}; 
     alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...)
     v = design.data[!, var]
-    probs = design.data[!, :probs]
+    probs = design.data[!, design.allprobs]
     df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p))
     # TODO: Add CI and SE of the quantile
     return df
diff --git a/src/total.jl b/src/total.jl
index 0c5001e5..1b200797 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -25,7 +25,7 @@ julia> total([:api00, :enroll], clus_one_stage)
 ```
 """
 function total(x::Symbol, design::ReplicateDesign)
-    X = wsum(design.data[!, x], weights(design.data.weights))
+    X = wsum(design.data[!, x], weights(design.data[!,design.weights]))
     Xt = [wsum(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(total = X, SE = sqrt(variance))
diff --git a/test/plot.jl b/test/plot.jl
index c2476f65..7e31fc74 100644
--- a/test/plot.jl
+++ b/test/plot.jl
@@ -3,7 +3,7 @@
     apisrs = load_data("apisrs")
     srs = SurveyDesign(apisrs, weights=:pw)
     s = plot(srs, :api99, :api00)
-    @test s.grid[1].entries[1].named[:markersize] == srs.data.weights
+    @test s.grid[1].entries[1].named[:markersize] == srs.data[!,srs.weights]
     @test s.grid[1].entries[1].positional[1] == srs.data.api99
     @test s.grid[1].entries[1].positional[2] == srs.data.api00
     # StratifiedSample

From 7c4706cce57214552d1796296d258bd104a04546 Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 15 Jan 2023 12:32:15 +0530
Subject: [PATCH 40/80] Update ReplicateDesign struct and doctest

---
 src/SurveyDesign.jl  | 12 +++++++-----
 test/SurveyDesign.jl |  4 ++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index ff67f682..a08cd1d9 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -118,14 +118,14 @@ julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
 
 julia> bootstrat = bootweights(strat; replicates=1000)
 ReplicateDesign:
-data: 200×1046 DataFrame
+data: 200×1044 DataFrame
 strata: stype
     [E, E, E  …  H]
 cluster: none
-popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
-sampsize: [200, 200, 200  …  200]
-weights: [44.2, 44.2, 44.2  …  15.1]
-probs: [0.0226, 0.0226, 0.0226  …  0.0662]
+popsize: [4420.9999, 4420.9999, 4420.9999  …  755.0]
+sampsize: [100, 100, 100  …  50]
+weights: [44.21, 44.21, 44.21  …  15.1]
+allprobs: [0.0226, 0.0226, 0.0226  …  0.0662]
 replicates: 1000
 ```
 """
@@ -135,6 +135,8 @@ struct ReplicateDesign <: AbstractSurveyDesign
     popsize::Symbol
     sampsize::Symbol
     strata::Symbol
+    weights::Symbol # Effective weights in case of singlestage approx supported
+    allprobs::Symbol # Right now only singlestage approx supported
     pps::Bool
     replicates::UInt
 end
diff --git a/test/SurveyDesign.jl b/test/SurveyDesign.jl
index de74f2f4..89dff3bf 100644
--- a/test/SurveyDesign.jl
+++ b/test/SurveyDesign.jl
@@ -119,9 +119,9 @@ end
     @test dclus2.data[!,dclus2.weights][121] ≈ 1 / calculated_probs_R[121] atol = 1e-4
     @test dclus2.data[!,dclus2.weights][125] ≈ 1 / calculated_probs_R[125] atol = 1e-4
 
-    # TODO: sampsize and popsize testing
+    # TODO: sampsize and popsize testing once #178 resolved
     ## NOT THE SAME AS R object right now
-    
+
     #########################
     ## Complete multistage sampling (when implemented) should look like
     ## weights should theoretically be optional if both clusters and popsize given

From 7eeb1faa779a97e9f908da8fa80e187082b9077a Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Sun, 15 Jan 2023 12:38:16 +0530
Subject: [PATCH 41/80] Update show testing suite

---
 test/show.jl | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/test/show.jl b/test/show.jl
index f81ed2a3..1231a910 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -5,13 +5,13 @@
     srs = SurveyDesign(apisrs; weights=:pw)
     refstr = """
     SurveyDesign:
-    data: 200×47 DataFrame
+    data: 200×45 DataFrame
     strata: none
     cluster: none
-    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    popsize: [6194.0, 6194.0, 6194.0  …  6194.0]
     sampsize: [200, 200, 200  …  200]
-    weights: [31.0, 31.0, 31.0  …  31.0]
-    probs: [0.0323, 0.0323, 0.0323  …  0.0323]"""
+    weights: [30.97, 30.97, 30.97  …  30.97]
+    allprobs: [0.0323, 0.0323, 0.0323  …  0.0323]"""
 
     show(io, MIME("text/plain"), srs)
     str = String(take!(io))
@@ -20,13 +20,13 @@
     bsrs = srs |> bootweights
     refstrb = """
     ReplicateDesign:
-    data: 200×4047 DataFrame
+    data: 200×4045 DataFrame
     strata: none
     cluster: none
-    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    popsize: [6194.0, 6194.0, 6194.0  …  6194.0]
     sampsize: [200, 200, 200  …  200]
-    weights: [31.0, 31.0, 31.0  …  31.0]
-    probs: [0.0323, 0.0323, 0.0323  …  0.0323]
+    weights: [30.97, 30.97, 30.97  …  30.97]
+    allprobs: [0.0323, 0.0323, 0.0323  …  0.0323]
     replicates: 4000"""
 
     show(io, MIME("text/plain"), bsrs)
@@ -41,14 +41,14 @@ end
     strat = SurveyDesign(apistrat; strata=:stype, weights=:pw)
     refstr = """
     SurveyDesign:
-    data: 200×46 DataFrame
+    data: 200×44 DataFrame
     strata: stype
         [E, E, E  …  H]
     cluster: none
-    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
-    sampsize: [200, 200, 200  …  200]
-    weights: [44.2, 44.2, 44.2  …  15.1]
-    probs: [0.0226, 0.0226, 0.0226  …  0.0662]"""
+    popsize: [4420.9999, 4420.9999, 4420.9999  …  755.0]
+    sampsize: [100, 100, 100  …  50]
+    weights: [44.21, 44.21, 44.21  …  15.1]
+    allprobs: [0.0226, 0.0226, 0.0226  …  0.0662]"""
 
     show(io, MIME("text/plain"), strat)
     str = String(take!(io))
@@ -57,14 +57,14 @@ end
     stratb = strat |> bootweights
     refstrb = """
     ReplicateDesign:
-    data: 200×4046 DataFrame
+    data: 200×4044 DataFrame
     strata: stype
         [E, E, E  …  H]
     cluster: none
-    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
-    sampsize: [200, 200, 200  …  200]
-    weights: [44.2, 44.2, 44.2  …  15.1]
-    probs: [0.0226, 0.0226, 0.0226  …  0.0662]
+    popsize: [4420.9999, 4420.9999, 4420.9999  …  755.0]
+    sampsize: [100, 100, 100  …  50]
+    weights: [44.21, 44.21, 44.21  …  15.1]
+    allprobs: [0.0226, 0.0226, 0.0226  …  0.0662]
     replicates: 4000"""
 
     show(io, MIME("text/plain"), stratb)
@@ -79,14 +79,14 @@ end
     clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
     refstr = """
     SurveyDesign:
-    data: 183×46 DataFrame
+    data: 183×44 DataFrame
     strata: none
     cluster: dnum
         [637, 637, 637  …  448]
-    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    popsize: [507.7049, 507.7049, 507.7049  …  507.7049]
     sampsize: [15, 15, 15  …  15]
-    weights: [33.8, 33.8, 33.8  …  33.8]
-    probs: [0.0295, 0.0295, 0.0295  …  0.0295]"""
+    weights: [33.847, 33.847, 33.847  …  33.847]
+    allprobs: [0.0295, 0.0295, 0.0295  …  0.0295]"""
 
     show(io, MIME("text/plain"), clus_one_stage)
     str = String(take!(io))
@@ -95,14 +95,14 @@ end
     clus_one_stageb = clus_one_stage |> bootweights
     refstrb = """
     ReplicateDesign:
-    data: 183×4046 DataFrame
+    data: 183×4044 DataFrame
     strata: none
     cluster: dnum
         [637, 637, 637  …  448]
-    popsize: [6190.0, 6190.0, 6190.0  …  6190.0]
+    popsize: [507.7049, 507.7049, 507.7049  …  507.7049]
     sampsize: [15, 15, 15  …  15]
-    weights: [33.8, 33.8, 33.8  …  33.8]
-    probs: [0.0295, 0.0295, 0.0295  …  0.0295]
+    weights: [33.847, 33.847, 33.847  …  33.847]
+    allprobs: [0.0295, 0.0295, 0.0295  …  0.0295]
     replicates: 4000"""
 
     show(io, MIME("text/plain"), clus_one_stageb)

From b79fce0419e476e96c56174cae3622fd1367221c Mon Sep 17 00:00:00 2001
From: smishr <43640926+smishr@users.noreply.github.com>
Date: Mon, 16 Jan 2023 13:23:37 +0530
Subject: [PATCH 42/80] Update .gitignore

acidentally pushed local gitignore

Co-authored-by: Ayush Patnaik <u6012645@anu.edu.au>
---
 .gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 494288f6..1d4d0304 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,5 +8,4 @@
 /dev/*
 .gitignore
 .DS_Store
-*.json
-src/ht.jl
\ No newline at end of file
+*.json
\ No newline at end of file

From 5275be9a14ba6499fb30acd966fffabbeee2dffe Mon Sep 17 00:00:00 2001
From: smishr <sm_data@outlook.com>
Date: Mon, 16 Jan 2023 13:52:41 +0530
Subject: [PATCH 43/80] Append _ to :weights :popsize :sampsize

---
 src/SurveyDesign.jl | 10 +++++-----
 src/ratio.jl        |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index a08cd1d9..2ee5a9c3 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -76,14 +76,14 @@ struct SurveyDesign <: AbstractSurveyDesign
             cluster = clusters
         end
         # For single-stage approximation only one "effective" sampsize vector
-        sampsize_labels = :sampsize
-        if isa(strata,Symbol) && isnothing(clusters) # If stratified sample then sampsize is inside strata
+        sampsize_labels = :_sampsize
+        if isa(strata,Symbol) && isnothing(clusters) # If stratified only then sampsize is inside strata
             data[!, sampsize_labels] = transform(groupby(data, strata), nrow => :counts).counts
         else
             data[!, sampsize_labels] = fill(length(unique(data[!, cluster])), (nrow(data),))
         end
         if isa(popsize, Symbol)
-            weights_labels = :weights
+            weights_labels = :_weights
             data[!, weights_labels] = data[!, popsize] ./ data[!, sampsize_labels]
         elseif isa(weights, Symbol)
             if !(typeof(data[!, weights]) <: Vector{<:Real})
@@ -91,12 +91,12 @@ struct SurveyDesign <: AbstractSurveyDesign
             else
                 # derive popsize from given `weights`
                 weights_labels = weights
-                popsize = :popsize
+                popsize = :_popsize
                 data[!, popsize] = data[!, sampsize_labels] .* data[!, weights_labels]
             end
         else
             # neither popsize nor weights given
-            weights_labels = :weights
+            weights_labels = :_weights
             data[!, weights_labels] = repeat([1], nrow(data))
         end
         allprobs_labels = :allprobs
diff --git a/src/ratio.jl b/src/ratio.jl
index 1623eb3a..ebfef889 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -17,14 +17,14 @@ julia> ratio(:api00, :enroll, clus_one_stage)
 ```
 """
 function ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesign)
-    statistic = wsum(design.data[!,variable_num],design.data.weights)/wsum(design.data[!,variable_den],design.data.weights)
+    statistic = wsum(design.data[!,variable_num],design.data[!,design.weights])/wsum(design.data[!,variable_den],design.data[!,design.weights])
     nh = length(unique(design.data[!,design.cluster]))
     newv = []
     gdf = groupby(design.data, design.cluster)
     replicates = [filter(n -> n != i, 1:nh) for i in 1:nh] 
     for i in replicates
         df = DataFrame(gdf[i])
-        push!(newv, wsum(df[!,variable_num],df[!,:weights])/wsum(df[!,variable_den],df[!,:weights]))
+        push!(newv, wsum(df[!,variable_num],df[!,design.weights])/wsum(df[!,variable_den],df[!,design.weights]))
     end
     c = 0
     for i in 1:nh

From b675dd9003aba7125b6186a2df4b0f5941a6786f Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Wed, 18 Jan 2023 22:44:13 +0530
Subject: [PATCH 44/80] Add SE for quantile

---
 src/quantile.jl  | 55 ++++++++++++++++++++++++++++++------------------
 test/quantile.jl | 17 ++-------------
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/src/quantile.jl b/src/quantile.jl
index 09ba9326..81003e43 100644
--- a/src/quantile.jl
+++ b/src/quantile.jl
@@ -13,32 +13,47 @@ The Julia, R and Python-numpy use the same defaults
 ```jldoctest
 julia> apisrs = load_data("apisrs");
 
-julia> srs = SurveyDesign(apisrs; weights=:pw);
+julia> srs = SurveyDesign(apisrs; weights=:pw) |> bootweights; 
 
 julia> quantile(:api00,srs,0.5)
 1×2 DataFrame
- Row │ probability  quantile 
-     │ Float64      Float64  
-─────┼───────────────────────
-   1 │         0.5     659.0
-
-julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
-5×2 DataFrame
- Row │ probability  quantile 
-     │ Float64      Float64  
-─────┼───────────────────────
-   1 │        0.1      245.5
-   2 │        0.2      317.6
-   3 │        0.5      453.0
-   4 │        0.75     668.5
-   5 │        0.95    1473.1
+ Row │ 0.5th percentile  SE      
+     │ Float64           Float64 
+─────┼───────────────────────────
+   1 │            659.0  14.9764
 ```
 """
-function quantile(var::Symbol, design::SurveyDesign, p::Union{<:Real,Vector{<:Real}}; 
-    alpha::Float64=0.05, ci::Bool=false, se::Bool=false, qrule="hf7",kwargs...)
+function quantile(var::Symbol, design::ReplicateDesign, p::Real;kwargs...)
     v = design.data[!, var]
     probs = design.data[!, design.allprobs]
-    df = DataFrame(probability=p, quantile=Statistics.quantile(v, ProbabilityWeights(probs), p))
-    # TODO: Add CI and SE of the quantile
+    X = Statistics.quantile(v, ProbabilityWeights(probs), p)
+    Xt = [Statistics.quantile(v, ProbabilityWeights(design.data[! , "replicate_"*string(i)]), p) for i in 1:design.replicates]
+    variance = sum((Xt .- X).^2) / design.replicates
+    df = DataFrame(percentile = X, SE = sqrt(variance))
+    rename!(df, :percentile => string(p) * "th percentile")
     return df
+end
+
+"""
+```jldoctest
+julia> apisrs = load_data("apisrs");
+
+julia> srs = SurveyDesign(apisrs; weights=:pw) |> bootweights; 
+
+julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
+5×3 DataFrame
+ Row │ percentile  statistic  SE       
+     │ String      Float64    Float64  
+─────┼─────────────────────────────────
+   1 │ 0.1             245.5   20.2964
+   2 │ 0.2             317.6   13.5435
+   3 │ 0.5             453.0   24.9719
+   4 │ 0.75            668.5   34.2487
+   5 │ 0.95           1473.1  142.568
+```
+"""
+function quantile(var::Symbol, design::ReplicateDesign, probs::Vector{<:Real}; kwargs...)
+    df = vcat([rename!(quantile(var, design, prob; kwargs...),[:statistic, :SE]) for prob in probs]...)
+    df.percentile = string.(probs)
+    return df[!, [:percentile, :statistic, :SE]]
 end
\ No newline at end of file
diff --git a/test/quantile.jl b/test/quantile.jl
index 59bb0a69..cca58da0 100644
--- a/test/quantile.jl
+++ b/test/quantile.jl
@@ -6,21 +6,8 @@
     apisrs_original[!, :derived_sampsize] = fill(200.0, size(apisrs_original, 1))
     ##############################
     apisrs = copy(apisrs_original)
-    srs_design = SurveyDesign(apisrs; weights=:pw) 
-    @test quantile(:api00, srs_design, 0.5)[!,2][1] ≈ 659.0 atol=1e-4
+    srs_design = SurveyDesign(apisrs; weights=:pw) |> bootweights
+    @test quantile(:api00, srs_design, 0.5)[!,1][1] ≈ 659.0 atol=1e-4
     @test quantile(:api00, srs_design, [0.1753,0.25,0.5,0.75,0.975])[!,2] ≈ [512.8847,544,659,752.5,905] atol = 1e-4
     @test quantile(:enroll,srs_design, [0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [245.5,317.6,453.0,668.5,1473.1] atol = 1e-4 
-end
-
-@testset "quantile_Stratified" begin
-    ##### StratifiedSample tests
-    # Load API datasets
-    apistrat_original = load_data("apistrat")
-    apistrat_original[!, :derived_probs] = 1 ./ apistrat_original.pw
-    apistrat_original[!, :derived_sampsize] = apistrat_original.fpc ./ apistrat_original.pw
-    # base functionality
-    apistrat = copy(apistrat_original)
-    dstrat = SurveyDesign(apistrat; strata = :stype, popsize = :fpc)
-    # Check which definition of quantile for StratifiedSample
-    # @test quantile(:enroll, dstrat, [0.1,0.2,0.5,0.75,0.95])[!,2] ≈ [262,309.3366,446.4103,658.8764,1589.7881] atol = 1e-4 
 end
\ No newline at end of file

From 8bb8bfd9db924394471570b99d331b6e77a291a5 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Thu, 19 Jan 2023 14:55:59 +0530
Subject: [PATCH 45/80] Remove jackknife and using bootstrap in ratio
 estimation.

---
 src/Survey.jl     |  1 -
 src/jackknife.jl  | 16 ----------------
 src/ratio.jl      | 33 +++++++++++----------------------
 test/jackknife.jl |  8 --------
 test/ratio.jl     |  6 +++---
 5 files changed, 14 insertions(+), 50 deletions(-)
 delete mode 100644 src/jackknife.jl
 delete mode 100644 test/jackknife.jl

diff --git a/src/Survey.jl b/src/Survey.jl
index f25e33a7..66de8042 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -17,7 +17,6 @@ include("SurveyDesign.jl")
 include("bootstrap.jl")
 include("mean.jl")
 include("quantile.jl")
-include("jackknife.jl")
 include("total.jl")
 include("load_data.jl")
 include("hist.jl")
diff --git a/src/jackknife.jl b/src/jackknife.jl
deleted file mode 100644
index 55880df9..00000000
--- a/src/jackknife.jl
+++ /dev/null
@@ -1,16 +0,0 @@
-function jkknife(variable:: Symbol, design::SurveyDesign ,func:: Function;  params =[])
-    statistic = func(design.data[!,variable],params...)
-    nh = length(unique(design.data[!,design.cluster]))
-    newv = []
-    gdf = groupby(design.data, design.cluster)
-    replicates = [filter(n -> n != i, 1:nh) for i in 1:nh] 
-    for i in replicates
-        push!(newv,func(DataFrame(gdf[i])[!,variable]))
-    end
-    c = 0
-    for i in 1:nh
-        c = c+(newv[i]-statistic)^2
-    end
-    var = c*(nh-1)/nh
-    return DataFrame(Statistic = statistic, SE = sqrt(var))
-end
diff --git a/src/ratio.jl b/src/ratio.jl
index ebfef889..8a0226f3 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -6,30 +6,19 @@ Estimate the ratio of the columns specified in numerator and denominator
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");
 
-julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw);
+julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> ratio(:api00, :enroll, clus_one_stage)
+ratio(:api00, :enroll, clus_one_stage)
 1×2 DataFrame
- Row │ Statistic  SE       
-     │ Float64    Float64  
-─────┼─────────────────────
-   1 │   1.17182  0.151242
+ Row │ ratio    SE       
+     │ Float64  Float64  
+─────┼───────────────────
+   1 │ 1.17182  0.130834
 ```
 """
-function ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesign)
-    statistic = wsum(design.data[!,variable_num],design.data[!,design.weights])/wsum(design.data[!,variable_den],design.data[!,design.weights])
-    nh = length(unique(design.data[!,design.cluster]))
-    newv = []
-    gdf = groupby(design.data, design.cluster)
-    replicates = [filter(n -> n != i, 1:nh) for i in 1:nh] 
-    for i in replicates
-        df = DataFrame(gdf[i])
-        push!(newv, wsum(df[!,variable_num],df[!,design.weights])/wsum(df[!,variable_den],df[!,design.weights]))
-    end
-    c = 0
-    for i in 1:nh
-        c = c+(newv[i]-statistic)^2
-    end
-    var = c*(nh-1)/nh
-    return DataFrame(Statistic = statistic, SE = sqrt(var))
+function ratio(variable_num::Symbol, variable_den::Symbol, design::ReplicateDesign)
+    X = wsum(design.data[!, variable_num], design.data[!, design.weights]) / wsum(design.data[!, variable_den], design.data[!, design.weights])
+    Xt = [(wsum(design.data[!, variable_num], weights(design.data[! , "replicate_"*string(i)]))) / (wsum(design.data[!, variable_den], weights(design.data[! , "replicate_"*string(i)]))) for i in 1:design.replicates]
+    variance = sum((Xt .- X).^2) / design.replicates
+    DataFrame(ratio = X, SE = sqrt(variance))
 end
diff --git a/test/jackknife.jl b/test/jackknife.jl
deleted file mode 100644
index 25e35e91..00000000
--- a/test/jackknife.jl
+++ /dev/null
@@ -1,8 +0,0 @@
-@testset "jackknife.jl" begin
-    apiclus1_original = load_data("apiclus1")
-    apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
-    apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights=:pw);
-    @test jkknife(:api00, dclus1, mean).SE[1] ≈ 26.5997 atol = 1e-4
-    @test jkknife(:api00, dclus1, mean).Statistic[1] ≈ 644.1693 atol = 1e-4
-end
diff --git a/test/ratio.jl b/test/ratio.jl
index b8652ef1..9185952c 100644
--- a/test/ratio.jl
+++ b/test/ratio.jl
@@ -4,7 +4,7 @@
     ##############################
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize = :fpc)
-    @test ratio(:api00, :enroll, dclus1).SE[1] ≈ 0.151242 atol = 1e-4
-    @test ratio(:api00, :enroll, dclus1).Statistic[1] ≈ 1.17182 atol = 1e-4
+    dclus1 = SurveyDesign(apiclus1; clusters = :dnum, popsize = :fpc) |> bootweights
+    @test ratio(:api00, :enroll, dclus1).SE[1] ≈ 0.1275446 atol = 1e-1
+    @test ratio(:api00, :enroll, dclus1).ratio[1] ≈ 1.17182 atol = 1e-4
 end
\ No newline at end of file

From b95d30af5d5ff7b25d0b87b3dc7e44d21d53c6e0 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Thu, 19 Jan 2023 15:12:32 +0530
Subject: [PATCH 46/80] Add warning to tell users that single stage
 approximation is used.

---
 src/SurveyDesign.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 2ee5a9c3..71ee1356 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -70,6 +70,7 @@ struct SurveyDesign <: AbstractSurveyDesign
         end
         ## Single stage approximation
         if typeof(clusters) <: Vector{Symbol}
+            @warn As part of single-stage approximation, only the first stage cluster ID is retained. 
             cluster = first(clusters)
         end
         if typeof(clusters) <: Symbol

From e9530e195d87131a32f37d314c8f406902308d54 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Thu, 19 Jan 2023 15:42:17 +0530
Subject: [PATCH 47/80] Fix syntax.

---
 src/SurveyDesign.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 71ee1356..fc8030f1 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -70,7 +70,7 @@ struct SurveyDesign <: AbstractSurveyDesign
         end
         ## Single stage approximation
         if typeof(clusters) <: Vector{Symbol}
-            @warn As part of single-stage approximation, only the first stage cluster ID is retained. 
+            @warn "As part of single-stage approximation, only the first stage cluster ID is retained." 
             cluster = first(clusters)
         end
         if typeof(clusters) <: Symbol

From 4e5e50a216a15c53fd52ecdf0dd4b5ebd1af5ee3 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Fri, 20 Jan 2023 16:28:59 +0530
Subject: [PATCH 48/80] Speed up bootstrap

---
 src/bootstrap.jl | 45 ++++++++++++++++++++-------------------------
 src/mean.jl      | 32 ++++++++++++++++----------------
 src/total.jl     | 28 ++++++++++++++--------------
 test/mean.jl     | 41 ++++++++++++++++++++++++-----------------
 test/total.jl    |  6 ------
 5 files changed, 74 insertions(+), 78 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index a5e9f019..630e98b8 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -4,14 +4,16 @@ julia> using Random
 
 julia> apiclus1 = load_data("apiclus1");
 
+
 julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, popsize=:fpc);
 
+
 julia> bootweights(clus_one_stage; replicates=1000, rng=MersenneTwister(111)) # choose a seed for deterministic results
 ReplicateDesign:
 data: 183×1044 DataFrame
 strata: none
 cluster: dnum
-    [637, 637, 637  …  448]
+    [61, 61, 61  …  815]
 popsize: [757, 757, 757  …  757]
 sampsize: [15, 15, 15  …  15]
 weights: [50.4667, 50.4667, 50.4667  …  50.4667]
@@ -20,32 +22,25 @@ replicates: 1000
 ```
 """
 function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(1234))
-    H = length(unique(design.data[!, design.strata]))
     stratified = groupby(design.data, design.strata)
-    function replicate(stratified, H)
-        for h in 1:H
-            substrata = DataFrame(stratified[h])
-            psus = unique(substrata[!, design.cluster])
-            if length(psus) <= 1
-                stratified[h].whij .= 0 # hasn't been tested yet. 
+    H = length(keys(stratified))
+    substrata_dfs = []
+    for h in 1:H
+        substrata = DataFrame(stratified[h])
+        cluster_sorted = sort(substrata, design.cluster)
+        psus = unique(cluster_sorted[!, design.cluster])
+        npsus = [(count(==(i), cluster_sorted[!, design.cluster])) for i in psus]
+        nh = length(psus)
+        randinds = rand(rng, 1:(nh), replicates, (nh-1))
+        for replicate in 1:replicates
+            rh = zeros(Int, nh)
+            for i in randinds[replicate, :]
+                rh[i] += 1
             end
-            nh = length(psus)
-            randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  
-            rh = [(count(==(i), randinds)) for i in 1:nh] # main bootstrap algo. 
-            gdf = groupby(substrata, design.cluster)
-            for i in 1:nh
-                gdf[i].whij = repeat([rh[i]], nrow(gdf[i])) .* gdf[i][!,design.weights] .* (nh / (nh - 1))
-            end            
-            stratified[h].whij = transform(gdf).whij
-            
-        end
-        return transform(stratified, :whij)
+            cluster_sorted[!, "replicate_" * string(replicate)] = vcat([repeat([rh[i] * (nh / (nh-1))], npsus[i]) for i in 1:length(rh)]...) .* cluster_sorted[!, design.weights] 
+        end   
+        push!(substrata_dfs, cluster_sorted)
     end
-    df = replicate(stratified, H)
-    rename!(df, :whij => :replicate_1)
-    df.replicate_1 = disallowmissing(df.replicate_1)
-    for i in 2:(replicates)
-        df[!, "replicate_" * string(i)] = disallowmissing(replicate(stratified, H).whij)
-    end 
+    df = vcat(substrata_dfs...)
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.weights, design.allprobs, design.pps, replicates) 
 end
diff --git a/src/mean.jl b/src/mean.jl
index 593b1d79..5b87ffdf 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -10,18 +10,18 @@ julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
 
 julia> mean(:api00, clus_one_stage)
 1×2 DataFrame
- Row │ mean     SE      
-     │ Float64  Float64 
+ Row │ mean     SE
+     │ Float64  Float64
 ─────┼──────────────────
-   1 │ 644.169  23.2919
+   1 │ 644.169  23.2877
 
 julia> mean([:api00, :enroll], clus_one_stage)
 2×3 DataFrame
  Row │ names   mean     SE
      │ String  Float64  Float64
 ─────┼──────────────────────────
-   1 │ api00   644.169  23.2919
-   2 │ enroll  549.716  45.3655
+   1 │ api00   644.169  23.2877
+   2 │ enroll  549.716  46.2597
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
@@ -52,17 +52,17 @@ julia> mean(:api00, :cname, clus_one_stage)
  Row │ cname        mean     SE
      │ String15     Float64  Any
 ─────┼───────────────────────────────────
-   1 │ Alameda      669.0    1.27388e-13
-   2 │ Fresno       472.0    1.13687e-13
-   3 │ Kern         452.5    0.0
-   4 │ Los Angeles  647.267  47.4938
-   5 │ Mendocino    623.25   1.0931e-13
-   6 │ Merced       519.25   4.57038e-15
-   7 │ Orange       710.563  2.19684e-13
-   8 │ Plumas       709.556  1.27773e-13
-   9 │ San Diego    659.436  2.63446
-  10 │ San Joaquin  551.189  2.17471e-13
-  11 │ Santa Clara  732.077  56.2584
+   1 │ Santa Clara  732.077  59.6794
+   2 │ San Diego    659.436  2.63657
+   3 │ Merced       519.25   8.18989e-15
+   4 │ Los Angeles  647.267  47.7685
+   5 │ Orange       710.563  2.21461e-13
+   6 │ Fresno       472.0    1.13687e-13
+   7 │ Plumas       709.556  1.26823e-13
+   8 │ Alameda      669.0    1.26888e-13
+   9 │ San Joaquin  551.189  2.17297e-13
+  10 │ Kern         452.5    0.0
+  11 │ Mendocino    623.25   1.09409e-13
 ```
 """
 function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
diff --git a/src/total.jl b/src/total.jl
index 1b200797..f6ceb823 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -13,15 +13,15 @@ julia> total(:api00, clus_one_stage)
  Row │ total      SE
      │ Float64    Float64
 ─────┼──────────────────────
-   1 │ 3.98999e6  9.22175e5
+   1 │ 3.98999e6  9.10443e5
 
 julia> total([:api00, :enroll], clus_one_stage)
 2×3 DataFrame
  Row │ names   total      SE
      │ String  Float64    Float64
 ─────┼──────────────────────────────
-   1 │ api00   3.98999e6  9.22175e5
-   2 │ enroll  3.40494e6  9.51557e5
+   1 │ api00   3.98999e6  9.10443e5
+   2 │ enroll  3.40494e6  9.47987e5
 ```
 """
 function total(x::Symbol, design::ReplicateDesign)
@@ -52,17 +52,17 @@ julia> total(:api00, :cname, clus_one_stage)
  Row │ cname        total           SE
      │ String15     Float64         Any
 ─────┼────────────────────────────────────────
-   1 │ Alameda      249080.0        2.48842e5
-   2 │ Fresno        63903.1        64452.2
-   3 │ Kern          30631.5        31083.0
-   4 │ Los Angeles       3.2862e5   2.93649e5
-   5 │ Mendocino     84380.6        83154.4
-   6 │ Merced        70300.2        69272.5
-   7 │ Orange            3.84807e5  3.90097e5
-   8 │ Plumas            2.16147e5  2.17811e5
-   9 │ San Diego         1.2276e6   8.78559e5
-  10 │ San Joaquin       6.90276e5  6.90685e5
-  11 │ Santa Clara       6.44244e5  4.09943e5
+   1 │ Santa Clara       6.44244e5  4.29558e5
+   2 │ San Diego         1.2276e6   8.60246e5
+   3 │ Merced        70300.2        70757.4
+   4 │ Los Angeles       3.2862e5   2.95688e5
+   5 │ Orange            3.84807e5  3.77128e5
+   6 │ Fresno        63903.1        64455.2
+   7 │ Plumas            2.16147e5  2.12279e5
+   8 │ Alameda      249080.0        2.5221e5
+   9 │ San Joaquin       6.90276e5  6.92353e5
+  10 │ Kern          30631.5        30333.5
+  11 │ Mendocino     84380.6        80774.4
 ```
 """
 function total(x::Symbol, domain::Symbol, design::ReplicateDesign)
diff --git a/test/mean.jl b/test/mean.jl
index 4745125b..2e9682a8 100644
--- a/test/mean.jl
+++ b/test/mean.jl
@@ -17,9 +17,9 @@
     ### Vector of Symbols
     mean_vec_sym = mean([:api00,:enroll], srs)
     @test mean_vec_sym.mean[1] ≈ 656.585 atol = 1e-4
-    @test mean_vec_sym.SE[1] ≈ 9.3065 atol = 1e-2
+    @test mean_vec_sym.SE[1] ≈ 9.3065 rtol = 1e-1
     @test mean_vec_sym.mean[2] ≈ 584.61 atol = 1e-4
-    @test mean_vec_sym.SE[2] ≈ 28.1048 atol = 1e-2
+    @test mean_vec_sym.SE[2] ≈ 28.1048 rtol = 1e-1
     ##############################
     ### Categorical Array - estimating proportions
     # apisrs_categ = copy(apisrs_original)
@@ -35,7 +35,7 @@ end
     apistrat = copy(apistrat_original)
     strat = SurveyDesign(apistrat, strata = :stype, weights = :pw) |> bootweights
     mean_strat = mean(:api00, strat)
-    @test mean_strat.mean[1] ≈ 662.29 atol = 1e-2
+    @test mean_strat.mean[1] ≈ 662.29 rtol = 1e-1
     @test mean_strat.SE[1] ≈ 9.48296 atol = 1e-1
 end
 
@@ -44,12 +44,12 @@ end
     apisrs = copy(apisrs_original)
     srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
     mean_symb_srs = mean(:api00, :stype, srs)
-    @test mean_symb_srs.mean[1] ≈ 605.36 atol = 1e-2
-    @test mean_symb_srs.mean[2] ≈ 666.141 atol = 1e-2
-    @test mean_symb_srs.mean[3] ≈ 654.273 atol = 1e-2
-    @test mean_symb_srs.SE[1] ≈ 22.6718 atol = 1e-2
-    @test mean_symb_srs.SE[2] ≈ 11.35390 atol = 1e-2
-    @test mean_symb_srs.SE[3] ≈ 22.3298 atol = 1e-2
+    @test mean_symb_srs.mean[1] ≈ 605.36 rtol = 1e-1
+    @test mean_symb_srs.mean[2] ≈ 666.141 rtol = 1e-1
+    @test mean_symb_srs.mean[3] ≈ 654.273 rtol = 1e-1
+    @test mean_symb_srs.SE[1] ≈ 22.6718 rtol = 1e-1
+    @test mean_symb_srs.SE[2] ≈ 11.35390 rtol = 1e-1
+    @test mean_symb_srs.SE[3] ≈ 22.3298 rtol = 1e-1
 end
 
 @testset "mean_svyby_Stratified" begin
@@ -57,12 +57,12 @@ end
     apistrat = copy(apistrat_original)
     strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
     mean_strat_symb = mean(:api00, :stype, strat)
-    @test mean_strat_symb.mean[1] ≈ 674.43 atol = 1e-2
-    @test mean_strat_symb.mean[2] ≈ 636.6 atol = 1e-2
-    @test mean_strat_symb.mean[3] ≈ 625.82 atol = 1e-2
-    @test mean_strat_symb.SE[1] ≈ 12.4398 atol = 1e-2
-    @test mean_strat_symb.SE[2] ≈ 16.5628 atol = 1e-2
-    @test mean_strat_symb.SE[3] ≈ 15.42320 atol = 1e-2
+    @test mean_strat_symb.mean[1] ≈ 674.43 rtol = 1e-1
+    @test mean_strat_symb.mean[2] ≈ 636.6 rtol = 1e-1
+    @test mean_strat_symb.mean[3] ≈ 625.82 rtol = 1e-1
+    @test mean_strat_symb.SE[1] ≈ 12.4398 rtol = 1e-1
+    @test mean_strat_symb.SE[2] ≈ 16.5628 rtol = 1e-1
+    @test mean_strat_symb.SE[3] ≈ 15.42320 rtol = 1e-1
 end
 
 @testset "mean_OneStageCluster" begin
@@ -73,6 +73,13 @@ end
     # one-stage cluster sample
     apiclus1 = copy(apiclus1_original)
     dclus1 = SurveyDesign(apiclus1; clusters =  :dnum, weights = :pw) |> bootweights 
-    @test mean(:api00, dclus1).mean[1] ≈ 644.17 atol = 1e-2
-    @test mean(:api00, dclus1).SE[1] ≈  23.291 atol = 1e-2 # without fpc as it hasn't been figured out for bootstrap. 
+    @test mean(:api00, dclus1).mean[1] ≈ 644.17 rtol = 1e-1
+    @test mean(:api00, dclus1).SE[1] ≈  23.291 rtol = 1e-1 # without fpc as it hasn't been figured out for bootstrap. 
+
+    mn = mean(:api00, :cname, dclus1)
+    @test size(mn)[1] == apiclus1.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 647.2667 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 41.537132 rtol = 1 # tolerance is too large
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 732.0769 rtol = STAT_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 54.215099 rtol = SE_TOL
 end
diff --git a/test/total.jl b/test/total.jl
index 6ac6ab06..51fb2f00 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -173,12 +173,6 @@ end
     @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 292840.83 rtol = SE_TOL
     @test filter(:cname => ==("San Diego"), tot).total[1] ≈ 1227596.71 rtol = STAT_TOL
     @test filter(:cname => ==("San Diego"), tot).SE[1] ≈ 860028.39 rtol = SE_TOL
-    mn = mean(:api00, :cname, clus1)
-    @test size(mn)[1] == apiclus1.cname |> unique |> length
-    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 647.2667 rtol = STAT_TOL
-    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 41.537132 rtol = 1 # tolerance is too large
-    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 732.0769 rtol = STAT_TOL
-    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 52.336574 rtol = SE_TOL
     # equivalent R code (results cause clutter):
     # > svyby(~api00, ~cname, clus1rep, svytotal)
     # > svyby(~api00, ~cname, clus1rep, svymean)

From f084eb2d89eb5bc26c014c7fa5351985788ec138 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Fri, 20 Jan 2023 17:39:23 +0530
Subject: [PATCH 49/80] Add ratio doctest changes.

---
 src/ratio.jl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/ratio.jl b/src/ratio.jl
index 8a0226f3..c5c897b0 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -8,12 +8,13 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-ratio(:api00, :enroll, clus_one_stage)
+julia> ratio(:api00, :enroll, clus_one_stage)
 1×2 DataFrame
- Row │ ratio    SE       
-     │ Float64  Float64  
+ Row │ ratio    SE
+     │ Float64  Float64
 ─────┼───────────────────
-   1 │ 1.17182  0.130834
+   1 │ 1.17182  0.133361
+
 ```
 """
 function ratio(variable_num::Symbol, variable_den::Symbol, design::ReplicateDesign)

From 4d25a36efb8138e022d2b80696b3e2b75856756b Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Fri, 20 Jan 2023 17:39:40 +0530
Subject: [PATCH 50/80] Add commented code for fixing doctests.

---
 docs/make.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/make.jl b/docs/make.jl
index c79915e1..ba49de19 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -7,6 +7,7 @@ DocMeta.setdocmeta!(Survey, :DocTestSetup, :(using Survey); recursive=true)
 makedocs(;
     modules=[Survey],
     authors="xKDR Forum",
+    # doctest = :fix, 
     repo="https://github.com/xKDR/Survey.jl/blob/{commit}{path}#{line}",
     sitename="$Survey.jl",
     format=Documenter.HTML(;

From b1718c016c46e2eac132e2de699782cef29980bd Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Fri, 20 Jan 2023 18:06:27 +0530
Subject: [PATCH 51/80] Use fill instead of repeat

---
 src/bootstrap.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 630e98b8..db3bb96f 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -36,8 +36,8 @@ function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(
             rh = zeros(Int, nh)
             for i in randinds[replicate, :]
                 rh[i] += 1
-            end
-            cluster_sorted[!, "replicate_" * string(replicate)] = vcat([repeat([rh[i] * (nh / (nh-1))], npsus[i]) for i in 1:length(rh)]...) .* cluster_sorted[!, design.weights] 
+            end            
+            cluster_sorted[!, "replicate_" * string(replicate)] = vcat([fill(rh[i] * (nh / (nh-1)), npsus[i]) for i in 1:length(rh)]...) .* cluster_sorted[!, design.weights] 
         end   
         push!(substrata_dfs, cluster_sorted)
     end

From 7fa8f5418e2a57f7f20d132ea04555939c2ad060 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Fri, 20 Jan 2023 18:06:31 +0530
Subject: [PATCH 52/80] Fix test.

---
 test/show.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/show.jl b/test/show.jl
index 1231a910..05ae7a1f 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -98,7 +98,7 @@ end
     data: 183×4044 DataFrame
     strata: none
     cluster: dnum
-        [637, 637, 637  …  448]
+        [61, 61, 61  …  815]
     popsize: [507.7049, 507.7049, 507.7049  …  507.7049]
     sampsize: [15, 15, 15  …  15]
     weights: [33.847, 33.847, 33.847  …  33.847]

From c60a95adf3bdd82d2c8903a2aa17334acef779a8 Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Sun, 22 Jan 2023 13:16:23 +0530
Subject: [PATCH 53/80] Further speedup bootstrap algo by 2x.

---
 src/bootstrap.jl |  9 +++------
 src/mean.jl      | 24 ++++++++++++------------
 src/ratio.jl     |  5 +++--
 src/total.jl     | 28 ++++++++++++++--------------
 4 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index db3bb96f..c57649a8 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -31,13 +31,10 @@ function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(
         psus = unique(cluster_sorted[!, design.cluster])
         npsus = [(count(==(i), cluster_sorted[!, design.cluster])) for i in psus]
         nh = length(psus)
-        randinds = rand(rng, 1:(nh), replicates, (nh-1))
+        cluster_weights = cluster_sorted[!, design.weights] 
         for replicate in 1:replicates
-            rh = zeros(Int, nh)
-            for i in randinds[replicate, :]
-                rh[i] += 1
-            end            
-            cluster_sorted[!, "replicate_" * string(replicate)] = vcat([fill(rh[i] * (nh / (nh-1)), npsus[i]) for i in 1:length(rh)]...) .* cluster_sorted[!, design.weights] 
+            randinds = rand(rng, 1:(nh), (nh-1))          
+            cluster_sorted[!, "replicate_" * string(replicate)] = vcat([fill((count(==(i), randinds)) * (nh / (nh-1)), npsus[i]) for i in 1:nh]...) .* cluster_weights
         end   
         push!(substrata_dfs, cluster_sorted)
     end
diff --git a/src/mean.jl b/src/mean.jl
index 5b87ffdf..51de1b74 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -13,15 +13,15 @@ julia> mean(:api00, clus_one_stage)
  Row │ mean     SE
      │ Float64  Float64
 ─────┼──────────────────
-   1 │ 644.169  23.2877
+   1 │ 644.169  23.4107
 
 julia> mean([:api00, :enroll], clus_one_stage)
 2×3 DataFrame
  Row │ names   mean     SE
      │ String  Float64  Float64
 ─────┼──────────────────────────
-   1 │ api00   644.169  23.2877
-   2 │ enroll  549.716  46.2597
+   1 │ api00   644.169  23.4107
+   2 │ enroll  549.716  45.7835
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
@@ -52,17 +52,17 @@ julia> mean(:api00, :cname, clus_one_stage)
  Row │ cname        mean     SE
      │ String15     Float64  Any
 ─────┼───────────────────────────────────
-   1 │ Santa Clara  732.077  59.6794
-   2 │ San Diego    659.436  2.63657
-   3 │ Merced       519.25   8.18989e-15
-   4 │ Los Angeles  647.267  47.7685
-   5 │ Orange       710.563  2.21461e-13
+   1 │ Santa Clara  732.077  58.2169
+   2 │ San Diego    659.436  2.66703
+   3 │ Merced       519.25   2.28936e-15
+   4 │ Los Angeles  647.267  47.6233
+   5 │ Orange       710.563  2.19826e-13
    6 │ Fresno       472.0    1.13687e-13
-   7 │ Plumas       709.556  1.26823e-13
-   8 │ Alameda      669.0    1.26888e-13
-   9 │ San Joaquin  551.189  2.17297e-13
+   7 │ Plumas       709.556  1.26058e-13
+   8 │ Alameda      669.0    1.27527e-13
+   9 │ San Joaquin  551.189  2.1791e-13
   10 │ Kern         452.5    0.0
-  11 │ Mendocino    623.25   1.09409e-13
+  11 │ Mendocino    623.25   1.09545e-13
 ```
 """
 function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
diff --git a/src/ratio.jl b/src/ratio.jl
index bc72d2f7..4a7385ac 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -8,12 +8,13 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-ratio(:api00, :enroll, clus_one_stage)
+julia> ratio(:api00, :enroll, clus_one_stage)
 1×2 DataFrame
  Row │ ratio    SE
      │ Float64  Float64
 ─────┼───────────────────
-   1 │ 1.17182  0.133361
+   1 │ 1.17182  0.131518
+
 ```
 """
 function ratio(variable_num::Symbol, variable_den::Symbol, design::ReplicateDesign)
diff --git a/src/total.jl b/src/total.jl
index f6ceb823..878d2635 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -13,15 +13,15 @@ julia> total(:api00, clus_one_stage)
  Row │ total      SE
      │ Float64    Float64
 ─────┼──────────────────────
-   1 │ 3.98999e6  9.10443e5
+   1 │ 3.98999e6  9.01611e5
 
 julia> total([:api00, :enroll], clus_one_stage)
 2×3 DataFrame
  Row │ names   total      SE
      │ String  Float64    Float64
 ─────┼──────────────────────────────
-   1 │ api00   3.98999e6  9.10443e5
-   2 │ enroll  3.40494e6  9.47987e5
+   1 │ api00   3.98999e6  9.01611e5
+   2 │ enroll  3.40494e6  9.33396e5
 ```
 """
 function total(x::Symbol, design::ReplicateDesign)
@@ -52,17 +52,17 @@ julia> total(:api00, :cname, clus_one_stage)
  Row │ cname        total           SE
      │ String15     Float64         Any
 ─────┼────────────────────────────────────────
-   1 │ Santa Clara       6.44244e5  4.29558e5
-   2 │ San Diego         1.2276e6   8.60246e5
-   3 │ Merced        70300.2        70757.4
-   4 │ Los Angeles       3.2862e5   2.95688e5
-   5 │ Orange            3.84807e5  3.77128e5
-   6 │ Fresno        63903.1        64455.2
-   7 │ Plumas            2.16147e5  2.12279e5
-   8 │ Alameda      249080.0        2.5221e5
-   9 │ San Joaquin       6.90276e5  6.92353e5
-  10 │ Kern          30631.5        30333.5
-  11 │ Mendocino     84380.6        80774.4
+   1 │ Santa Clara       6.44244e5  4.2273e5
+   2 │ San Diego         1.2276e6   8.62727e5
+   3 │ Merced        70300.2        71336.3
+   4 │ Los Angeles       3.2862e5   2.93936e5
+   5 │ Orange            3.84807e5  3.88014e5
+   6 │ Fresno        63903.1        64781.7
+   7 │ Plumas            2.16147e5  2.12089e5
+   8 │ Alameda      249080.0        2.49228e5
+   9 │ San Joaquin       6.90276e5  6.81604e5
+  10 │ Kern          30631.5        30870.3
+  11 │ Mendocino     84380.6        80215.9
 ```
 """
 function total(x::Symbol, domain::Symbol, design::ReplicateDesign)

From 7290feee8b62a2303ad2cadf74ee4eba6d8a91d1 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Thu, 26 Jan 2023 19:48:15 +0200
Subject: [PATCH 54/80] Restructure documentation, finish tutorial

---
 docs/make.jl                |   6 +-
 docs/src/api.md             |   2 +-
 docs/src/getting_started.md | 203 ++++++++++++++++++++++++++++++++++++
 docs/src/index.md           |  28 +----
 docs/src/manual.md          |   1 +
 5 files changed, 212 insertions(+), 28 deletions(-)
 create mode 100644 docs/src/getting_started.md
 create mode 100644 docs/src/manual.md

diff --git a/docs/make.jl b/docs/make.jl
index ba49de19..2f6535ca 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -17,6 +17,8 @@ makedocs(;
     ),
     pages=[
         "Home" => "index.md",
+        "Getting Started" => "getting_started.md",
+        "Manual" => "manual.md",
         "Moving from R" => "R_comparison.md",
         "API reference" => "api.md"
     ],
@@ -25,6 +27,6 @@ makedocs(;
 
 deploydocs(;
     repo="github.com/xKDR/Survey.jl",
-    target = "build",
-    devbranch="main"
+    target="build",
+    devbranch="main",
 )
diff --git a/docs/src/api.md b/docs/src/api.md
index 5b538a55..0890ab65 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -19,7 +19,7 @@ mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
 total(x::Symbol, design::ReplicateDesign)
 total(x::Symbol, domain::Symbol, design::ReplicateDesign)
 quantile
-ratio(variable_num::Symbol, variable_den::Symbol, design::SurveyDesign)
+ratio
 plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 hist(design::AbstractSurveyDesign, var::Symbol,
diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
new file mode 100644
index 00000000..54b97243
--- /dev/null
+++ b/docs/src/getting_started.md
@@ -0,0 +1,203 @@
+## Instalation
+
+The `Survey.jl` package is not yet registered. For now, installation of the package
+is done using the following command:
+
+```julia
+]  add "https://github.com/xKDR/Survey.jl.git"
+```
+
+After registration, the regular `Pkg` commands can be used for installing the package:
+
+```julia
+julia> using Pkg
+
+julia> Pkg.add("Survey")
+```
+
+```julia
+julia> ]  add Survey
+```
+
+## Tutorial
+
+This tutorial assumes basic knowledge of statistics and survey analysis.
+
+To begin this tutorial, load the package in your workspace:
+
+```julia
+julia> using Survey
+```
+
+Now load a survey dataset that you want to study. In this tutorial we will be using
+the [Academic Performance Index](https://r-survey.r-forge.r-project.org/survey/html/api.html)
+(API) datasets for Californian schools. The datasets contain information for all
+schools with at least 100 students and for various probability samples of the
+data.
+
+!!! note
+
+    The API program has been discontinued at the end of 2018. Information is archived
+    at [https://www.cde.ca.gov/re/pr/api.asp](https://www.cde.ca.gov/re/pr/api.asp)
+
+```julia
+julia> apisrs = load_data("apisrs")
+200×40 DataFrame
+ Row │ Column1  cds             stype    name             sname                          snum   dn ⋯
+     │ Int64    Int64           String1  String15         String                         Int64  St ⋯
+─────┼──────────────────────────────────────────────────────────────────────────────────────────────
+   1 │    1039  15739081534155  H        McFarland High   McFarland High                  1039  Mc ⋯
+   2 │    1124  19642126066716  E        Stowers (Cecil   Stowers (Cecil B.) Elementary   1124  AB
+   3 │    2868  30664493030640  H        Brea-Olinda Hig  Brea-Olinda High                2868  Br
+   4 │    1273  19644516012744  E        Alameda Element  Alameda Elementary              1273  Do
+   5 │    4926  40688096043293  E        Sunnyside Eleme  Sunnyside Elementary            4926  Sa ⋯
+   6 │    2463  19734456014278  E        Los Molinos Ele  Los Molinos Elementary          2463  Ha
+  ⋮  │    ⋮           ⋮            ⋮            ⋮                       ⋮                  ⋮       ⋱
+ 196 │     969  15635291534775  H        North High       North High                       969  Ke
+ 197 │    1752  19647336017446  E        Hammel Street E  Hammel Street Elementary        1752  Lo
+ 198 │    4480  37683386039143  E        Audubon Element  Audubon Elementary              4480  Sa ⋯
+ 199 │    4062  36678196036222  E        Edison Elementa  Edison Elementary               4062  On
+ 200 │    2683  24657716025621  E        Franklin Elemen  Franklin Elementary             2683  Me
+                                                                     34 columns and 189 rows omitted
+```
+
+`apisrs` is a simple random sample of the Academic Performance Index of Californian
+schools. The [`load_data`](@ref) function loads it as a
+[`DataFrame`](https://dataframes.juliadata.org/stable/lib/types/#DataFrames.DataFrame).
+You can look at the column names of `apisrs` to get an idea of what the dataset
+contains.
+
+```julia
+julia> names(apisrs)
+40-element Vector{String}:
+ "Column1"
+ "cds"
+ "stype"
+ "name"
+ "sname"
+ "snum"
+ "dname"
+ "dnum"
+ ⋮
+ "avg.ed"
+ "full"
+ "emer"
+ "enroll"
+ "api.stu"
+ "pw"
+ "fpc"
+```
+
+Next, build a survey design from your `DataFrame`:
+
+```julia
+julia> srs = SurveyDesign(apisrs; weights=:pw)
+SurveyDesign:
+data: 200×45 DataFrame
+strata: none
+cluster: none
+popsize: [6194.0, 6194.0, 6194.0  …  6194.0]
+sampsize: [200, 200, 200  …  200]
+weights: [30.97, 30.97, 30.97  …  30.97]
+allprobs: [0.0323, 0.0323, 0.0323  …  0.0323]
+```
+
+This is a simple random sample design with weights given by the column `:pw` of
+`apisrs`. You can also create more complex designs such as stratified or cluster
+sample designs. You can find more information on the complete capabilities of
+the package in the [Manual](@ref). The purpose of this tutorial is to show the
+basic usage of the package. For that, we will stick with a simple random sample.
+
+Now you can analyse your design according to your needs using the
+[functionality](@ref Index) provided by the package. For example, you can compute
+the estimated mean or population total for a given variable. Let's say we're
+interested in the mean Academic Performance Index from the year 1999. First we
+need to convert the [`SurveyDesign`](@ref) to a [`ReplicateDesign`](@ref) using
+bootstrapping:
+
+```julia
+julia> bsrs = bootweights(srs)
+ReplicateDesign:
+data: 200×4045 DataFrame
+strata: none
+cluster: none
+popsize: [6194.0, 6194.0, 6194.0  …  6194.0]
+sampsize: [200, 200, 200  …  200]
+weights: [30.97, 30.97, 30.97  …  30.97]
+allprobs: [0.0323, 0.0323, 0.0323  …  0.0323]
+replicates: 4000
+```
+
+We do this because [TODO: explain why]. Now we can compute the estimated mean:
+
+```julia
+julia> mean(:api99, bsrs)
+1×2 DataFrame
+ Row │ mean     SE
+     │ Float64  Float64
+─────┼──────────────────
+   1 │ 624.685   9.5747
+```
+
+We can also find the mean of both the 1999 API and 2000 API for a clear
+comparison between students' performance from one year to another:
+
+```julia
+2×3 DataFrame
+ Row │ names   mean     SE
+     │ String  Float64  Float64
+─────┼──────────────────────────
+   1 │ api99   624.685  9.5747
+   2 │ api00   656.585  9.30656
+```
+
+The [`ratio`](@ref) is also appropriate for studying the relationship between
+the two APIs:
+
+```julia
+julia> ratio(:api00, :api99, bsrs)
+1×2 DataFrame
+ Row │ ratio    SE
+     │ Float64  Float64
+─────┼─────────────────────
+   1 │ 1.05107  0.00364165
+```
+
+If we're interested in a certain statistic estimated by a specific domain, we
+can add the domain as the second parameter to our function. Let's say we want
+to find the estimated total number of students enrolled in schools from each
+county:
+
+```julia
+julia> total(:enroll, :cname, bsrs)
+38×3 DataFrame
+ Row │ cname            total           SE
+     │ String15         Float64         Any
+─────┼────────────────────────────────────────────
+   1 │ Kern                  1.95823e5  74984.5
+   2 │ Los Angeles      867129.0        1.34517e5
+   3 │ Orange                1.68786e5  63990.2
+   4 │ San Luis Obispo    6720.49       6731.29
+   5 │ San Francisco     30319.6        18024.1
+   6 │ Modoc              6503.7        6500.84
+  ⋮  │        ⋮               ⋮             ⋮
+  34 │ Yolo              12171.2        12131.8
+  35 │ Calaveras         12976.4        13095.7
+  36 │ Napa              39239.0        29841.1
+  37 │ Lake               6410.79       6562.72
+  38 │ Merced            15392.1        14921.9
+                                   27 rows omitted
+```
+
+Another way to visualize data is through graphs. We can make a histogram to
+better see the distribution of enrolled students:
+
+```@setup warning
+# !!!THIS NEEDS TO MATCH THE EXAMPLE IN THE DOCSTRING OF `hist`
+```
+
+```julia
+julia> hist(srs, :enroll)
+```
+
+![](assets/hist.png)
diff --git a/docs/src/index.md b/docs/src/index.md
index eddbcf0f..dd95d64d 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -4,28 +4,6 @@ CurrentModule = Survey
 
 # Survey
 
-This package is used to study complex survey data. It aims to be a fast alternative to the [Survey package in R](https://cran.r-project.org/web/packages/survey/index.html) developed by [Professor Thomas Lumley](https://www.stat.auckland.ac.nz/people/tlum005).
-
-This package currently supports simple random sample and stratified sample. In future releases, it will support multistage sampling as well. 
-
-## Basic demo
-
-The following demo uses the
-[Academic Performance Index](https://r-survey.r-forge.r-project.org/survey/html/api.html)
-(API) dataset for Californian schools. The data sets contain information for all schools
-with at least 100 students and for various probability samples of the data.
-
-The API program has been discontinued at the end of 2018. Information is archived at
-[https://www.cde.ca.gov/re/pr/api.asp](https://www.cde.ca.gov/re/pr/api.asp)
-
-Firstly, a survey design needs a dataset from which to gather information. The sample
-datasets provided with the package can be loaded as `DataFrame`s using [`load_data`](@ref):
-
-```julia
-julia> apisrs = load_data("apisrs");
-```
-
-`apisrs` is a simple random sample of the Academic Performance Index of Californian schools.
-
-Next, we can build a design.
-#TODO: continue tutorial
+This package is used to study complex survey data. It aims to be a fast alternative
+to the [Survey package in R](https://cran.r-project.org/web/packages/survey/index.html)
+developed by [Professor Thomas Lumley](https://www.stat.auckland.ac.nz/people/tlum005).
diff --git a/docs/src/manual.md b/docs/src/manual.md
new file mode 100644
index 00000000..39021136
--- /dev/null
+++ b/docs/src/manual.md
@@ -0,0 +1 @@
+# Manual

From cbb1a015e11f233b88aa7ac747f192606463c5bd Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Thu, 26 Jan 2023 20:16:38 +0200
Subject: [PATCH 55/80] Add manual structure

---
 docs/src/manual.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index 39021136..57063217 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -1 +1,9 @@
 # Manual
+
+## `DataFrames` dependence
+
+## Bootstrapping
+
+## Plotting
+
+## Performance

From ca186e498e536e148fc734a286b2e45c497b2895 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 11:45:29 +0200
Subject: [PATCH 56/80] Add underscore to `allprobs` column label for
 consistency

---
 src/SurveyDesign.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index fc8030f1..bce497e3 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -100,7 +100,7 @@ struct SurveyDesign <: AbstractSurveyDesign
             weights_labels = :_weights
             data[!, weights_labels] = repeat([1], nrow(data))
         end
-        allprobs_labels = :allprobs
+        allprobs_labels = :_allprobs
         data[!, allprobs_labels] = 1 ./ data[!, weights_labels] # In one-stage cluster sample, allprobs is just probs, no multiplication needed
         pps = false # for now no explicit pps supported faster functions, but they can be added
         new(data, cluster, popsize, sampsize_labels, strata, weights_labels, allprobs_labels, pps)

From 40a7fbfca3c79e62bf65e3009510ff486d0297ac Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 11:45:56 +0200
Subject: [PATCH 57/80] Add subsection on `DataFrames`

---
 docs/src/manual.md | 162 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 161 insertions(+), 1 deletion(-)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index 57063217..a83e3fa3 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -1,6 +1,166 @@
 # Manual
 
-## `DataFrames` dependence
+## `DataFrames` in `Survey`
+
+The internal structure of a survey design is build upon
+[`DataFrames`](https://dataframes.juliadata.org/stable/). In fact, the `data`
+argument is the only required argument for the constructor and it must be an
+[`AbstractDataFrame`](https://dataframes.juliadata.org/stable/lib/types/#DataFrames.AbstractDataFrame).
+
+### Data manipulation
+
+The provided `DataFrame` is altered by the [`SurveyDesign`](@ref) constructor
+in order to add columns for frequency and probability weights, sample and
+population sizes and, if necessary, strata and cluster information.
+
+Notice the change in `apisrs`:
+
+```julia
+julia> apisrs = load_data("apisrs")
+200×40 DataFrame
+ Row │ Column1  cds             stype    name             sname                ⋯
+     │ Int64    Int64           String1  String15         String               ⋯
+─────┼──────────────────────────────────────────────────────────────────────────
+   1 │    1039  15739081534155  H        McFarland High   McFarland High       ⋯
+   2 │    1124  19642126066716  E        Stowers (Cecil   Stowers (Cecil B.) E
+   3 │    2868  30664493030640  H        Brea-Olinda Hig  Brea-Olinda High
+   4 │    1273  19644516012744  E        Alameda Element  Alameda Elementary
+   5 │    4926  40688096043293  E        Sunnyside Eleme  Sunnyside Elementary ⋯
+   6 │    2463  19734456014278  E        Los Molinos Ele  Los Molinos Elementa
+  ⋮  │    ⋮           ⋮            ⋮            ⋮                       ⋮      ⋱
+ 196 │     969  15635291534775  H        North High       North High
+ 197 │    1752  19647336017446  E        Hammel Street E  Hammel Street Elemen
+ 198 │    4480  37683386039143  E        Audubon Element  Audubon Elementary   ⋯
+ 199 │    4062  36678196036222  E        Edison Elementa  Edison Elementary
+ 200 │    2683  24657716025621  E        Franklin Elemen  Franklin Elementary
+                                                 36 columns and 189 rows omitted
+
+julia> names(apisrs)
+40-element Vector{String}:
+ "Column1"
+ "cds"
+ "stype"
+ "name"
+ "sname"
+ "snum"
+ "dname"
+ "dnum"
+ ⋮
+ "avg.ed"
+ "full"
+ "emer"
+ "enroll"
+ "api.stu"
+ "pw"
+ "fpc"
+
+julia> srs = SurveyDesign(apisrs; weights=:pw);
+
+julia> apisrs
+200×45 DataFrame
+ Row │ Column1  cds             stype    name             sname                ⋯
+     │ Int64    Int64           String1  String15         String               ⋯
+─────┼──────────────────────────────────────────────────────────────────────────
+   1 │    1039  15739081534155  H        McFarland High   McFarland High       ⋯
+   2 │    1124  19642126066716  E        Stowers (Cecil   Stowers (Cecil B.) E
+   3 │    2868  30664493030640  H        Brea-Olinda Hig  Brea-Olinda High
+   4 │    1273  19644516012744  E        Alameda Element  Alameda Elementary
+   5 │    4926  40688096043293  E        Sunnyside Eleme  Sunnyside Elementary ⋯
+   6 │    2463  19734456014278  E        Los Molinos Ele  Los Molinos Elementa
+  ⋮  │    ⋮           ⋮            ⋮            ⋮                       ⋮      ⋱
+ 196 │     969  15635291534775  H        North High       North High
+ 197 │    1752  19647336017446  E        Hammel Street E  Hammel Street Elemen
+ 198 │    4480  37683386039143  E        Audubon Element  Audubon Elementary   ⋯
+ 199 │    4062  36678196036222  E        Edison Elementa  Edison Elementary
+ 200 │    2683  24657716025621  E        Franklin Elemen  Franklin Elementary
+                                                 41 columns and 189 rows omitted
+
+julia> names(apisrs)
+45-element Vector{String}:
+ "Column1"
+ "cds"
+ "stype"
+ "name"
+ "sname"
+ "snum"
+ "dname"
+ "dnum"
+ ⋮
+ "pw"
+ "fpc"
+ "false_strata"
+ "false_cluster"
+ "_sampsize"
+ "_popsize"
+ "_allprobs"
+```
+
+Five columns were added:
+
+- `false_strata` - only in the case of no stratification
+  
+  This column is necessary because when making a [`ReplicateDesign`](@ref), the
+  [`bootweights`](@ref) function uses [`groupby`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.groupby)
+  with a column representing the stratification variable. If there are no strata,
+  there is no such column so it should be added in order to keep `bootweights`
+  general.
+
+- `false_cluster` - only in the case of no clustering
+  
+  The reasoning is the same as in the case of no stratification.
+
+- `_sampsize` - sample sizes
+
+- `_popsize` - population sizes
+  
+  These match the stratification variable:
+
+  ```julia
+  julia> apistrat = load_data("apistrat");
+
+  julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
+
+  julia> apistrat[:, [:stype, :_sampsize, :_popsize]]
+  200×3 DataFrame
+   Row │ stype    _sampsize  _popsize
+       │ String1  Int64      Float64
+  ─────┼──────────────────────────────
+     1 │ E              100    4421.0
+     2 │ E              100    4421.0
+     3 │ E              100    4421.0
+     4 │ E              100    4421.0
+     5 │ E              100    4421.0
+     6 │ E              100    4421.0
+    ⋮  │    ⋮         ⋮         ⋮
+   196 │ E              100    4421.0
+   197 │ H               50     755.0
+   198 │ M               50    1018.0
+   199 │ E              100    4421.0
+   200 │ H               50     755.0
+                      189 rows omitted
+  ```
+
+- `_allprobs` - probability weights
+
+No column was added for frequency weights because the column passed through the
+`weights` argument is used by other functions, hence there is no need to add a
+new column. If `weights` is not specified, then a column called `_weights` is
+added.
+
+### Why `DataFrames`
+
+Survey data most of the time, if not always, is structured in a way that is very
+well suited for data frames. The [`DataFrames.jl`](https://dataframes.juliadata.org/stable/)
+package is mature and well maintained and provides a lot of functionality that
+proves useful for using inside functions such as [`bootweights`](@ref) or
+[`mean`](@ref). Mainly, the functions used are
+[`groupby`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.groupby)
+and [`combine`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.combine).
+
+Now that support for [metadata](https://dataframes.juliadata.org/stable/lib/metadata/)
+was introduced in `DataFrames.jl`, it becomes possible to use metadata in
+`Survey.jl` to reduce space complexity. For example, stratification and clustering
+information could be stored as metadata of the `DataFrame` passed through `data`.
 
 ## Bootstrapping
 

From 7ac37e14123ea990da570e7ba744ac36ac25e239 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 13:13:36 +0200
Subject: [PATCH 58/80] Change `julia` blocks to `@repl` blocks

---
 docs/src/getting_started.md | 135 ++++++++----------------------------
 docs/src/manual.md          | 117 ++++---------------------------
 2 files changed, 45 insertions(+), 207 deletions(-)

diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
index 54b97243..7fdd8ba6 100644
--- a/docs/src/getting_started.md
+++ b/docs/src/getting_started.md
@@ -9,14 +9,14 @@ is done using the following command:
 
 After registration, the regular `Pkg` commands can be used for installing the package:
 
-```julia
-julia> using Pkg
+```@repl
+using Pkg
 
-julia> Pkg.add("Survey")
+Pkg.add("Survey")
 ```
 
 ```julia
-julia> ]  add Survey
+]  add Survey
 ```
 
 ## Tutorial
@@ -25,8 +25,8 @@ This tutorial assumes basic knowledge of statistics and survey analysis.
 
 To begin this tutorial, load the package in your workspace:
 
-```julia
-julia> using Survey
+```@repl tutorial
+using Survey
 ```
 
 Now load a survey dataset that you want to study. In this tutorial we will be using
@@ -40,25 +40,8 @@ data.
     The API program has been discontinued at the end of 2018. Information is archived
     at [https://www.cde.ca.gov/re/pr/api.asp](https://www.cde.ca.gov/re/pr/api.asp)
 
-```julia
-julia> apisrs = load_data("apisrs")
-200×40 DataFrame
- Row │ Column1  cds             stype    name             sname                          snum   dn ⋯
-     │ Int64    Int64           String1  String15         String                         Int64  St ⋯
-─────┼──────────────────────────────────────────────────────────────────────────────────────────────
-   1 │    1039  15739081534155  H        McFarland High   McFarland High                  1039  Mc ⋯
-   2 │    1124  19642126066716  E        Stowers (Cecil   Stowers (Cecil B.) Elementary   1124  AB
-   3 │    2868  30664493030640  H        Brea-Olinda Hig  Brea-Olinda High                2868  Br
-   4 │    1273  19644516012744  E        Alameda Element  Alameda Elementary              1273  Do
-   5 │    4926  40688096043293  E        Sunnyside Eleme  Sunnyside Elementary            4926  Sa ⋯
-   6 │    2463  19734456014278  E        Los Molinos Ele  Los Molinos Elementary          2463  Ha
-  ⋮  │    ⋮           ⋮            ⋮            ⋮                       ⋮                  ⋮       ⋱
- 196 │     969  15635291534775  H        North High       North High                       969  Ke
- 197 │    1752  19647336017446  E        Hammel Street E  Hammel Street Elementary        1752  Lo
- 198 │    4480  37683386039143  E        Audubon Element  Audubon Elementary              4480  Sa ⋯
- 199 │    4062  36678196036222  E        Edison Elementa  Edison Elementary               4062  On
- 200 │    2683  24657716025621  E        Franklin Elemen  Franklin Elementary             2683  Me
-                                                                     34 columns and 189 rows omitted
+```@repl tutorial
+apisrs = load_data("apisrs")
 ```
 
 `apisrs` is a simple random sample of the Academic Performance Index of Californian
@@ -67,39 +50,14 @@ schools. The [`load_data`](@ref) function loads it as a
 You can look at the column names of `apisrs` to get an idea of what the dataset
 contains.
 
-```julia
-julia> names(apisrs)
-40-element Vector{String}:
- "Column1"
- "cds"
- "stype"
- "name"
- "sname"
- "snum"
- "dname"
- "dnum"
- ⋮
- "avg.ed"
- "full"
- "emer"
- "enroll"
- "api.stu"
- "pw"
- "fpc"
+```@repl tutorial
+names(apisrs)
 ```
 
 Next, build a survey design from your `DataFrame`:
 
-```julia
-julia> srs = SurveyDesign(apisrs; weights=:pw)
-SurveyDesign:
-data: 200×45 DataFrame
-strata: none
-cluster: none
-popsize: [6194.0, 6194.0, 6194.0  …  6194.0]
-sampsize: [200, 200, 200  …  200]
-weights: [30.97, 30.97, 30.97  …  30.97]
-allprobs: [0.0323, 0.0323, 0.0323  …  0.0323]
+```@repl tutorial
+srs = SurveyDesign(apisrs; weights=:pw)
 ```
 
 This is a simple random sample design with weights given by the column `:pw` of
@@ -115,52 +73,28 @@ interested in the mean Academic Performance Index from the year 1999. First we
 need to convert the [`SurveyDesign`](@ref) to a [`ReplicateDesign`](@ref) using
 bootstrapping:
 
-```julia
-julia> bsrs = bootweights(srs)
-ReplicateDesign:
-data: 200×4045 DataFrame
-strata: none
-cluster: none
-popsize: [6194.0, 6194.0, 6194.0  …  6194.0]
-sampsize: [200, 200, 200  …  200]
-weights: [30.97, 30.97, 30.97  …  30.97]
-allprobs: [0.0323, 0.0323, 0.0323  …  0.0323]
-replicates: 4000
+```@repl tutorial
+bsrs = bootweights(srs)
 ```
 
 We do this because [TODO: explain why]. Now we can compute the estimated mean:
 
-```julia
-julia> mean(:api99, bsrs)
-1×2 DataFrame
- Row │ mean     SE
-     │ Float64  Float64
-─────┼──────────────────
-   1 │ 624.685   9.5747
+```@repl tutorial
+mean(:api99, bsrs)
 ```
 
 We can also find the mean of both the 1999 API and 2000 API for a clear
 comparison between students' performance from one year to another:
 
-```julia
-2×3 DataFrame
- Row │ names   mean     SE
-     │ String  Float64  Float64
-─────┼──────────────────────────
-   1 │ api99   624.685  9.5747
-   2 │ api00   656.585  9.30656
+```@repl tutorial
+mean([:api99, :api00], bsrs)
 ```
 
 The [`ratio`](@ref) is also appropriate for studying the relationship between
 the two APIs:
 
-```julia
-julia> ratio(:api00, :api99, bsrs)
-1×2 DataFrame
- Row │ ratio    SE
-     │ Float64  Float64
-─────┼─────────────────────
-   1 │ 1.05107  0.00364165
+```@repl tutorial
+ratio(:api00, :api99, bsrs)
 ```
 
 If we're interested in a certain statistic estimated by a specific domain, we
@@ -168,25 +102,8 @@ can add the domain as the second parameter to our function. Let's say we want
 to find the estimated total number of students enrolled in schools from each
 county:
 
-```julia
-julia> total(:enroll, :cname, bsrs)
-38×3 DataFrame
- Row │ cname            total           SE
-     │ String15         Float64         Any
-─────┼────────────────────────────────────────────
-   1 │ Kern                  1.95823e5  74984.5
-   2 │ Los Angeles      867129.0        1.34517e5
-   3 │ Orange                1.68786e5  63990.2
-   4 │ San Luis Obispo    6720.49       6731.29
-   5 │ San Francisco     30319.6        18024.1
-   6 │ Modoc              6503.7        6500.84
-  ⋮  │        ⋮               ⋮             ⋮
-  34 │ Yolo              12171.2        12131.8
-  35 │ Calaveras         12976.4        13095.7
-  36 │ Napa              39239.0        29841.1
-  37 │ Lake               6410.79       6562.72
-  38 │ Merced            15392.1        14921.9
-                                   27 rows omitted
+```@repl tutorial
+total(:enroll, :cname, bsrs)
 ```
 
 Another way to visualize data is through graphs. We can make a histogram to
@@ -200,4 +117,12 @@ better see the distribution of enrolled students:
 julia> hist(srs, :enroll)
 ```
 
+The REPL doesn't show the plot. To see it, you need to save it locally.
+
+```julia
+julia> import AlgebraOfGraphics.save
+
+julia> save("hist.png", h)
+```
+
 ![](assets/hist.png)
diff --git a/docs/src/manual.md b/docs/src/manual.md
index a83e3fa3..9b1006a1 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -15,84 +15,16 @@ population sizes and, if necessary, strata and cluster information.
 
 Notice the change in `apisrs`:
 
-```julia
-julia> apisrs = load_data("apisrs")
-200×40 DataFrame
- Row │ Column1  cds             stype    name             sname                ⋯
-     │ Int64    Int64           String1  String15         String               ⋯
-─────┼──────────────────────────────────────────────────────────────────────────
-   1 │    1039  15739081534155  H        McFarland High   McFarland High       ⋯
-   2 │    1124  19642126066716  E        Stowers (Cecil   Stowers (Cecil B.) E
-   3 │    2868  30664493030640  H        Brea-Olinda Hig  Brea-Olinda High
-   4 │    1273  19644516012744  E        Alameda Element  Alameda Elementary
-   5 │    4926  40688096043293  E        Sunnyside Eleme  Sunnyside Elementary ⋯
-   6 │    2463  19734456014278  E        Los Molinos Ele  Los Molinos Elementa
-  ⋮  │    ⋮           ⋮            ⋮            ⋮                       ⋮      ⋱
- 196 │     969  15635291534775  H        North High       North High
- 197 │    1752  19647336017446  E        Hammel Street E  Hammel Street Elemen
- 198 │    4480  37683386039143  E        Audubon Element  Audubon Elementary   ⋯
- 199 │    4062  36678196036222  E        Edison Elementa  Edison Elementary
- 200 │    2683  24657716025621  E        Franklin Elemen  Franklin Elementary
-                                                 36 columns and 189 rows omitted
-
-julia> names(apisrs)
-40-element Vector{String}:
- "Column1"
- "cds"
- "stype"
- "name"
- "sname"
- "snum"
- "dname"
- "dnum"
- ⋮
- "avg.ed"
- "full"
- "emer"
- "enroll"
- "api.stu"
- "pw"
- "fpc"
-
-julia> srs = SurveyDesign(apisrs; weights=:pw);
-
-julia> apisrs
-200×45 DataFrame
- Row │ Column1  cds             stype    name             sname                ⋯
-     │ Int64    Int64           String1  String15         String               ⋯
-─────┼──────────────────────────────────────────────────────────────────────────
-   1 │    1039  15739081534155  H        McFarland High   McFarland High       ⋯
-   2 │    1124  19642126066716  E        Stowers (Cecil   Stowers (Cecil B.) E
-   3 │    2868  30664493030640  H        Brea-Olinda Hig  Brea-Olinda High
-   4 │    1273  19644516012744  E        Alameda Element  Alameda Elementary
-   5 │    4926  40688096043293  E        Sunnyside Eleme  Sunnyside Elementary ⋯
-   6 │    2463  19734456014278  E        Los Molinos Ele  Los Molinos Elementa
-  ⋮  │    ⋮           ⋮            ⋮            ⋮                       ⋮      ⋱
- 196 │     969  15635291534775  H        North High       North High
- 197 │    1752  19647336017446  E        Hammel Street E  Hammel Street Elemen
- 198 │    4480  37683386039143  E        Audubon Element  Audubon Elementary   ⋯
- 199 │    4062  36678196036222  E        Edison Elementa  Edison Elementary
- 200 │    2683  24657716025621  E        Franklin Elemen  Franklin Elementary
-                                                 41 columns and 189 rows omitted
-
-julia> names(apisrs)
-45-element Vector{String}:
- "Column1"
- "cds"
- "stype"
- "name"
- "sname"
- "snum"
- "dname"
- "dnum"
- ⋮
- "pw"
- "fpc"
- "false_strata"
- "false_cluster"
- "_sampsize"
- "_popsize"
- "_allprobs"
+```@setup manual_DataFrames
+using Survey
+```
+
+```@repl manual_DataFrames
+apisrs = load_data("apisrs")
+names(apisrs)
+srs = SurveyDesign(apisrs; weights=:pw);
+apisrs
+names(apisrs)
 ```
 
 Five columns were added:
@@ -115,30 +47,11 @@ Five columns were added:
   
   These match the stratification variable:
 
-  ```julia
-  julia> apistrat = load_data("apistrat");
-
-  julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
-
-  julia> apistrat[:, [:stype, :_sampsize, :_popsize]]
-  200×3 DataFrame
-   Row │ stype    _sampsize  _popsize
-       │ String1  Int64      Float64
-  ─────┼──────────────────────────────
-     1 │ E              100    4421.0
-     2 │ E              100    4421.0
-     3 │ E              100    4421.0
-     4 │ E              100    4421.0
-     5 │ E              100    4421.0
-     6 │ E              100    4421.0
-    ⋮  │    ⋮         ⋮         ⋮
-   196 │ E              100    4421.0
-   197 │ H               50     755.0
-   198 │ M               50    1018.0
-   199 │ E              100    4421.0
-   200 │ H               50     755.0
-                      189 rows omitted
-  ```
+```@repl manual_DataFrames
+apistrat = load_data("apistrat");
+strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
+apistrat[:, [:stype, :_sampsize, :_popsize]]
+```
 
 - `_allprobs` - probability weights
 

From 1cb9f7883a48695243e2746c2379973772265bf3 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 13:24:12 +0200
Subject: [PATCH 59/80] Remove extra blank lines

---
 src/bootstrap.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index c57649a8..1b1972eb 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -4,10 +4,8 @@ julia> using Random
 
 julia> apiclus1 = load_data("apiclus1");
 
-
 julia> clus_one_stage = SurveyDesign(apiclus1; clusters = :dnum, popsize=:fpc);
 
-
 julia> bootweights(clus_one_stage; replicates=1000, rng=MersenneTwister(111)) # choose a seed for deterministic results
 ReplicateDesign:
 data: 183×1044 DataFrame

From d9f72f6ea32497d119aaf5600cee8b7b6f712b66 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 13:35:29 +0200
Subject: [PATCH 60/80] Combine docstrings into one

---
 src/quantile.jl | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/quantile.jl b/src/quantile.jl
index 81003e43..187e5e19 100644
--- a/src/quantile.jl
+++ b/src/quantile.jl
@@ -21,24 +21,6 @@ julia> quantile(:api00,srs,0.5)
      │ Float64           Float64 
 ─────┼───────────────────────────
    1 │            659.0  14.9764
-```
-"""
-function quantile(var::Symbol, design::ReplicateDesign, p::Real;kwargs...)
-    v = design.data[!, var]
-    probs = design.data[!, design.allprobs]
-    X = Statistics.quantile(v, ProbabilityWeights(probs), p)
-    Xt = [Statistics.quantile(v, ProbabilityWeights(design.data[! , "replicate_"*string(i)]), p) for i in 1:design.replicates]
-    variance = sum((Xt .- X).^2) / design.replicates
-    df = DataFrame(percentile = X, SE = sqrt(variance))
-    rename!(df, :percentile => string(p) * "th percentile")
-    return df
-end
-
-"""
-```jldoctest
-julia> apisrs = load_data("apisrs");
-
-julia> srs = SurveyDesign(apisrs; weights=:pw) |> bootweights; 
 
 julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
 5×3 DataFrame
@@ -52,6 +34,17 @@ julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
    5 │ 0.95           1473.1  142.568
 ```
 """
+function quantile(var::Symbol, design::ReplicateDesign, p::Real; kwargs...)
+    v = design.data[!, var]
+    probs = design.data[!, design.allprobs]
+    X = Statistics.quantile(v, ProbabilityWeights(probs), p)
+    Xt = [Statistics.quantile(v, ProbabilityWeights(design.data[! , "replicate_"*string(i)]), p) for i in 1:design.replicates]
+    variance = sum((Xt .- X).^2) / design.replicates
+    df = DataFrame(percentile = X, SE = sqrt(variance))
+    rename!(df, :percentile => string(p) * "th percentile")
+    return df
+end
+
 function quantile(var::Symbol, design::ReplicateDesign, probs::Vector{<:Real}; kwargs...)
     df = vcat([rename!(quantile(var, design, prob; kwargs...),[:statistic, :SE]) for prob in probs]...)
     df.percentile = string.(probs)

From 9d0c7ca61367fb9e5ae74913bce5e6d107e8c053 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 13:35:48 +0200
Subject: [PATCH 61/80] Add dot at the end of sentence

---
 src/ratio.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ratio.jl b/src/ratio.jl
index 4a7385ac..3b20b806 100644
--- a/src/ratio.jl
+++ b/src/ratio.jl
@@ -1,7 +1,7 @@
 """
     ratio(numerator, denominator, design)
 
-Estimate the ratio of the columns specified in numerator and denominator
+Estimate the ratio of the columns specified in numerator and denominator.
 
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");

From 74d06e5b02796121003631b72727b2403ee24850 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 13:36:32 +0200
Subject: [PATCH 62/80] Change docstring example to REPL-style

---
 src/boxplot.jl | 15 ++++++++++-----
 src/hist.jl    | 15 ++++++++++-----
 src/plot.jl    | 15 ++++++++++-----
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/boxplot.jl b/src/boxplot.jl
index 8ee3dcc4..b28d84b4 100644
--- a/src/boxplot.jl
+++ b/src/boxplot.jl
@@ -8,11 +8,16 @@ Weights can be specified by a `Symbol` using the keyword argument `weights`.
 The keyword arguments are all the arguments that can be passed to `mapping` in
 [AlgebraOfGraphics](https://docs.juliahub.com/AlgebraOfGraphics/CHIaw/0.4.7/).
 
-```@example boxplot
-apisrs = load_data("apisrs");
-srs = SurveyDesign(apisrs; weights=:pw);
-bp = boxplot(srs, :stype, :enroll; weights = :pw)
-save("boxplot.png", bp); nothing # hide
+```julia
+julia> using AlgebraOfGraphics
+
+julia> apisrs = load_data("apisrs");
+
+julia> srs = SurveyDesign(apisrs; weights=:pw);
+
+julia> bp = boxplot(srs, :stype, :enroll; weights = :pw);
+
+julia> save("boxplot.png", bp)
 ```
 
 ![](assets/boxplot.png)
diff --git a/src/hist.jl b/src/hist.jl
index 40935a1e..369620c4 100644
--- a/src/hist.jl
+++ b/src/hist.jl
@@ -59,11 +59,16 @@ For the complete argument list see [Makie.hist](https://makie.juliaplots.org/sta
 
     The `weights` argument should be a `Symbol` specifying a design variable.
 
-```@example histogram
-apisrs = load_data("apisrs");
-srs = SurveyDesign(apisrs; weights=:pw);
-h = hist(srs, :enroll)
-save("hist.png", h); nothing # hide
+```julia
+julia> using AlgebraOfGraphics
+
+julia> apisrs = load_data("apisrs");
+
+julia> srs = SurveyDesign(apisrs; weights=:pw);
+
+julia> h = hist(srs, :enroll);
+
+julia> save("hist.png", h)
 ```
 
 ![](assets/hist.png)
diff --git a/src/plot.jl b/src/plot.jl
index 79f1b97d..2e1e2338 100644
--- a/src/plot.jl
+++ b/src/plot.jl
@@ -6,11 +6,16 @@ Scatter plot of survey design variables `x` and `y`.
 The plot takes into account the frequency weights specified by the user
 in the design.
 
-```@example plot
-apisrs = load_data("apisrs");
-srs = SurveyDesign(apisrs; weights=:pw);
-s = plot(srs, :api99, :api00)
-save("scatter.png", s); nothing # hide
+```julia
+julia> using AlgebraOfGraphics
+
+julia> apisrs = load_data("apisrs");
+
+julia> srs = SurveyDesign(apisrs; weights=:pw);
+
+julia> s = plot(srs, :api99, :api00);
+
+julia> save("scatter.png", s)
 ```
 
 ![](assets/scatter.png)

From 1582090e797ee39948fefdfa4ab3efbef89fc9af Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 13:48:09 +0200
Subject: [PATCH 63/80] Restructure API to include multiple methods nicer

---
 docs/src/api.md | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 0890ab65..341dcbba 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -14,17 +14,11 @@ SurveyDesign
 ReplicateDesign
 load_data
 bootweights
-mean(x::Symbol, design::ReplicateDesign)
-mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
-total(x::Symbol, design::ReplicateDesign)
-total(x::Symbol, domain::Symbol, design::ReplicateDesign)
+mean
+total
 quantile
 ratio
-plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
-boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
-hist(design::AbstractSurveyDesign, var::Symbol,
-                 bins::Union{Integer, AbstractVector} = freedman_diaconis(design, var);
-                 normalization = :density,
-                 kwargs...
-                )
+plot
+boxplot
+hist
 ```

From 0252a4bc3690d5bb5a33bdeadfb42579d33bd9c6 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 16:52:39 +0200
Subject: [PATCH 64/80] Add Plotting section

---
 docs/src/manual.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index 9b1006a1..e87ef2c6 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -79,4 +79,23 @@ information could be stored as metadata of the `DataFrame` passed through `data`
 
 ## Plotting
 
+`Survey` uses [`AlgebraOfGraphics`](https://aog.makie.org/stable/) for plotting.
+All plotting functions support a variable number of keyword arguments (through
+`kwargs...`) that are passed internally to corresponding `AlgebraOfGraphics`
+functions. See the source code for details:
+[`plot`](https://github.com/xKDR/Survey.jl/blob/main/src/plot.jl),
+[`hist`](https://github.com/xKDR/Survey.jl/blob/main/src/hist.jl),
+[`boxplot`](https://github.com/xKDR/Survey.jl/blob/main/src/boxplot.jl).
+This means that all functionality provided by `AlgebraOfGraphics` is supported
+in `Survey`.
+
+Specific functionality might need to be imported from `AlgebraOfGraphics`.
+Moreover, in order to choose the preferred
+[`Makie backend`](https://docs.makie.org/stable/#makie_ecosystem) you must
+explicitly use it:
+
+```@repl
+using AlgebraOfGraphics, CairoMakie
+```
+
 ## Performance

From c2346c84715ee985971b3ba71b52b04a5be24c11 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 17:40:34 +0200
Subject: [PATCH 65/80] Style check

---
 src/quantile.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/quantile.jl b/src/quantile.jl
index 187e5e19..2fd8b797 100644
--- a/src/quantile.jl
+++ b/src/quantile.jl
@@ -15,14 +15,14 @@ julia> apisrs = load_data("apisrs");
 
 julia> srs = SurveyDesign(apisrs; weights=:pw) |> bootweights; 
 
-julia> quantile(:api00,srs,0.5)
+julia> quantile(:api00, srs, 0.5)
 1×2 DataFrame
  Row │ 0.5th percentile  SE      
      │ Float64           Float64 
 ─────┼───────────────────────────
    1 │            659.0  14.9764
 
-julia> quantile(:enroll,srs,[0.1,0.2,0.5,0.75,0.95])
+julia> quantile(:enroll, srs, [0.1,0.2,0.5,0.75,0.95])
 5×3 DataFrame
  Row │ percentile  statistic  SE       
      │ String      Float64    Float64  

From 97389a2f78f2904269451e063668fd472449dc36 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 18:53:13 +0200
Subject: [PATCH 66/80] Add comparisons

---
 docs/make.jl       |  1 -
 docs/src/manual.md | 98 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 2f6535ca..aeaab208 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -19,7 +19,6 @@ makedocs(;
         "Home" => "index.md",
         "Getting Started" => "getting_started.md",
         "Manual" => "manual.md",
-        "Moving from R" => "R_comparison.md",
         "API reference" => "api.md"
     ],
     checkdocs=:exports,
diff --git a/docs/src/manual.md b/docs/src/manual.md
index e87ef2c6..6d726064 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -98,4 +98,100 @@ explicitly use it:
 using AlgebraOfGraphics, CairoMakie
 ```
 
-## Performance
+## Comparison with other languages
+
+There are multiple languages that offer survey analysis tools, most notably
+[SAS/STAT](https://support.sas.com/rnd/app/stat/procedures/SurveyAnalysis.html)
+and [R](https://CRAN.R-project.org/package=survey).
+
+### R comparison
+
+The inspiration for `Survey.jl` comes from R. Hence the syntax is in most cases
+very similar to the syntax in the [`survey` package](https://cran.r-project.org/web/packages/survey/survey.pdf)
+from R. To showcase this we will use the `apisrs` dataset found in both R's
+`survey` and `Survey.jl`. See the [Tutorial](@ref) section for more details about
+the `api` datesets.
+
+All examples show the R code first, followed by the Julia code.
+
+#### Loading data
+
+```R
+data(api)
+# all `api` datasets are loaded globally
+```
+
+```julia
+srs = load_data("apisrs")
+# only one dataset is loaded and stored in a variable
+```
+
+#### Creating a design
+
+```R
+srs = svydesign(id=~1, data=apisrs, weights=~pw) # simple random sample
+strat = svydesign(id=~1, data=apistrat, strata=~stype, weights=~pw) # stratified
+clus1 = svydesign(id=~dnum, data=apiclus1, weights=~pw) # clustered (one stage)
+```
+
+```julia
+srs = SurveyDesign(apisrs; weights=:pw) # simple random sample
+strat = SurveyDesign(apistrat; strata=:stype, weights=:pw) # stratified
+clus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # clustered (one stage)
+```
+
+#### Creating a replicate design
+
+```R
+bsrs = as.svrepdesign(srs, type="bootstrap")
+```
+
+```julia
+bsrs = bootweights(srs)
+```
+
+#### Computing the estimated mean
+
+```R
+svymean(~api00, bsrs)
+svymean(~api99+~api00, bsrs)
+```
+
+```julia
+mean(:api00, bsrs)
+mean([:api99, :api00], bsrs)
+```
+
+#### Computing the estimated total
+
+```R
+svytotal(~api00, bsrs)
+svytotal(~api99+~api00, bsrs)
+```
+
+```julia
+total(:api00, bsrs)
+total([:api99, :api00], bsrs)
+```
+
+#### Computing quantiles
+
+```R
+svyquantile(~api00, bsrs, 0.5)
+svyquantile(~api00, bsrs, c(0.25, 0.5, 0.75))
+```
+
+```julia
+quantile(:api00, bsrs, 0.5)
+quantile(:api00, bsrs, [0.25, 0.5, 0.75])
+```
+
+#### Domain estimation
+
+```R
+svyby(~api00, ~cname, bsrs, svymean)
+```
+
+```julia
+mean(:api00, :cname, bsrs)
+```

From 185198d5bd2777c0bbc1326e5c6631990825b238 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 19:00:24 +0200
Subject: [PATCH 67/80] Remove R_comparison.md file

---
 docs/src/R_comparison.md | 131 ---------------------------------------
 1 file changed, 131 deletions(-)
 delete mode 100644 docs/src/R_comparison.md

diff --git a/docs/src/R_comparison.md b/docs/src/R_comparison.md
deleted file mode 100644
index 1d1a4b2d..00000000
--- a/docs/src/R_comparison.md
+++ /dev/null
@@ -1,131 +0,0 @@
-# Moving from R to Julia
-This section presents examples to help move from R to Julia. Examples show R and Julia code for common operations in survey analysis. <br>
-For the same operation, first the R and then the Julia code is presented. 
-
-## Simple random sample
-
-The `apisrs` data, which is provided in both `survey` and `Survey.jl`, is used as an example. It's a simple random sample of the Academic Performance Index of Californian schools.
-
-### 1. Creating a survey design
-Instantiating a simple random sample survey design.
-
-```R
-library(survey)
-data(api)
-dsrs = svydesign(id = ~1, data = apisrs, weights = ~pw, fpc = ~fpc)
-```
-
-```julia
-using Survey
-srs = load_data("apisrs")
-dsrs = SimpleRandomSample(srs; popsize = :fpc)
-```
-
-### 2. Mean
-In the following example the mean of the variable `api00` is calculated. 
-
-```R
-svymean(~api00, dsrs)
-```
-```julia
-mean(:api00, dsrs)
-```
-
-### 3. Total
-In the following example the sum of the variable `api00` is calculated. 
-
-```R
-svytotal(~api00, dsrs)
-```
-```julia
-total(:api00, dsrs)
-```
-
-### 4. Quantile
-In the following example the median of the variable `api00` is calculated.
-```R
-svyquantile(~api00, dsrs, 0.5)
-```
-```julia
-quantile(:api00, dsrs, 0.5)
-```
-
-### 5. Domain estimation
-In the following example the mean of the variable `api00` is calculated grouped by the variable `cname`. 
-
-```R
-svyby(~api00, ~cname, dsrs, svymean)
-```
-
-```julia
-mean(:api00, :cname, dsrs)
-```
-
-In the following example the total of the variable `api00` is calculated grouped by the variable `cname`. 
-
-```R
-svyby(~api00, ~cname, dsrs, svytotal)
-```
-
-```julia
-total(:api00, :cname, dsrs)
-```
-
-## Stratified sample
-
-The `apistrat` data, which is provided in both `survey` and `Survey`, is used as an example. It's a stratified sample of the Academic Performance Index of Californian schools.
-
-### 1. Creating a design object
-The following example shows how to construct a design object for a stratified sample. 
-
-```R
-library(survey)
-data(api)
-dstrat = svydesign(id = ~1, data = apistrat, strata = ~stype, weights = ~pw, fpc = ~fpc)
-```
-
-```julia
-using Survey
-strat = load_data("apistrat")
-dstrat = StratifiedSample(strat, :stype; popsize  = :fpc)
-```
-
-### 2. Mean
-In the following example the mean of the variable `api00` is calculated. 
-
-```R
-svymean(~api00, dstrat)
-```
-```julia
-mean(:api00, dstrat)
-```
-
-### 3. Total
-In the following example the sum of the variable `api00` is calculated. 
-
-```R
-svytotal(~api00, dstrat)
-```
-```julia
-total(:api00, dstrat)
-```
-
-### 4. Quantile
-In the following example the median of the variable `api00` is calculated.
-```R
-svyquantile(~api00, dstrat, 0.5)
-```
-```julia
-quantile(:api00, dstrat, 0.5)
-```
-
-### 5. Domain estimation
-In the following example the mean of the variable `api00` is calculated grouped by the variable `cname`. 
-
-```R
-svyby(~api00, ~cname, dstrat, svymean)
-```
-
-```julia
-mean(:api00, :cname, dstrat)
-```
\ No newline at end of file

From 36d308b8a08c5c09de856f7333923089ae453621 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 19:01:46 +0200
Subject: [PATCH 68/80] Add Future plans section

---
 docs/src/manual.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index 6d726064..7310e072 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -195,3 +195,5 @@ svyby(~api00, ~cname, bsrs, svymean)
 ```julia
 mean(:api00, :cname, bsrs)
 ```
+
+## Future plans

From fb32c8f0f7598d739fe4b6fb151180561eb6d8cc Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 19:03:29 +0200
Subject: [PATCH 69/80] Change section name to ReplicateDesign

---
 docs/src/manual.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index 7310e072..3197bb6a 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -75,7 +75,7 @@ was introduced in `DataFrames.jl`, it becomes possible to use metadata in
 `Survey.jl` to reduce space complexity. For example, stratification and clustering
 information could be stored as metadata of the `DataFrame` passed through `data`.
 
-## Bootstrapping
+## ReplicateDesign
 
 ## Plotting
 

From 1ed029701625208614ee7228f5431207ff0423a1 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 19:03:54 +0200
Subject: [PATCH 70/80] Remove backticks

---
 docs/src/manual.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index 3197bb6a..1adcc8c2 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -1,6 +1,6 @@
 # Manual
 
-## `DataFrames` in `Survey`
+## DataFrames in Survey
 
 The internal structure of a survey design is build upon
 [`DataFrames`](https://dataframes.juliadata.org/stable/). In fact, the `data`
@@ -60,7 +60,7 @@ No column was added for frequency weights because the column passed through the
 new column. If `weights` is not specified, then a column called `_weights` is
 added.
 
-### Why `DataFrames`
+### Why DataFrames
 
 Survey data most of the time, if not always, is structured in a way that is very
 well suited for data frames. The [`DataFrames.jl`](https://dataframes.juliadata.org/stable/)

From 907625f4531d7ee0085e5c2d487616f95bb2e28e Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 19:24:09 +0200
Subject: [PATCH 71/80] Restructure files

---
 docs/make.jl                |  10 +-
 docs/src/getting_started.md |   2 +-
 docs/src/man/comparisons.md |  97 ++++++++++++++++++
 docs/src/man/dataframes.md  |  74 ++++++++++++++
 docs/src/man/future.md      |   1 +
 docs/src/man/plotting.md    |  20 ++++
 docs/src/man/replicate.md   |   1 +
 docs/src/manual.md          | 199 ------------------------------------
 8 files changed, 202 insertions(+), 202 deletions(-)
 create mode 100644 docs/src/man/comparisons.md
 create mode 100644 docs/src/man/dataframes.md
 create mode 100644 docs/src/man/future.md
 create mode 100644 docs/src/man/plotting.md
 create mode 100644 docs/src/man/replicate.md
 delete mode 100644 docs/src/manual.md

diff --git a/docs/make.jl b/docs/make.jl
index aeaab208..8ef8a61b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -18,8 +18,14 @@ makedocs(;
     pages=[
         "Home" => "index.md",
         "Getting Started" => "getting_started.md",
-        "Manual" => "manual.md",
-        "API reference" => "api.md"
+        "Manual" => [
+            "DataFrames in Survey" => "man/dataframes.md",
+            "ReplicateDesign" => "man/replicate.md",
+            "Plotting" => "man/plotting.md",
+            "Comparison with other languages" => "man/comparisons.md",
+            "Future plans" => "man/future.md",
+        ],
+        "API reference" => "api.md",
     ],
     checkdocs=:exports,
 )
diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
index 7fdd8ba6..de7a09bc 100644
--- a/docs/src/getting_started.md
+++ b/docs/src/getting_started.md
@@ -63,7 +63,7 @@ srs = SurveyDesign(apisrs; weights=:pw)
 This is a simple random sample design with weights given by the column `:pw` of
 `apisrs`. You can also create more complex designs such as stratified or cluster
 sample designs. You can find more information on the complete capabilities of
-the package in the [Manual](@ref). The purpose of this tutorial is to show the
+the package in the [Manual](@ref manual). The purpose of this tutorial is to show the
 basic usage of the package. For that, we will stick with a simple random sample.
 
 Now you can analyse your design according to your needs using the
diff --git a/docs/src/man/comparisons.md b/docs/src/man/comparisons.md
new file mode 100644
index 00000000..232dd905
--- /dev/null
+++ b/docs/src/man/comparisons.md
@@ -0,0 +1,97 @@
+# Comparison with other languages
+
+There are multiple languages that offer survey analysis tools, most notably
+[SAS/STAT](https://support.sas.com/rnd/app/stat/procedures/SurveyAnalysis.html)
+and [R](https://CRAN.R-project.org/package=survey).
+
+## R comparison
+
+The inspiration for `Survey.jl` comes from R. Hence the syntax is in most cases
+very similar to the syntax in the [`survey` package](https://cran.r-project.org/web/packages/survey/survey.pdf)
+from R. To showcase this we will use the `apisrs` dataset found in both R's
+`survey` and `Survey.jl`. See the [Tutorial](@ref) section for more details about
+the `api` datesets.
+
+All examples show the R code first, followed by the Julia code.
+
+#### Loading data
+
+```R
+data(api)
+# all `api` datasets are loaded globally
+```
+
+```julia
+srs = load_data("apisrs")
+# only one dataset is loaded and stored in a variable
+```
+
+#### Creating a design
+
+```R
+srs = svydesign(id=~1, data=apisrs, weights=~pw) # simple random sample
+strat = svydesign(id=~1, data=apistrat, strata=~stype, weights=~pw) # stratified
+clus1 = svydesign(id=~dnum, data=apiclus1, weights=~pw) # clustered (one stage)
+```
+
+```julia
+srs = SurveyDesign(apisrs; weights=:pw) # simple random sample
+strat = SurveyDesign(apistrat; strata=:stype, weights=:pw) # stratified
+clus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # clustered (one stage)
+```
+
+#### Creating a replicate design
+
+```R
+bsrs = as.svrepdesign(srs, type="bootstrap")
+```
+
+```julia
+bsrs = bootweights(srs)
+```
+
+#### Computing the estimated mean
+
+```R
+svymean(~api00, bsrs)
+svymean(~api99+~api00, bsrs)
+```
+
+```julia
+mean(:api00, bsrs)
+mean([:api99, :api00], bsrs)
+```
+
+#### Computing the estimated total
+
+```R
+svytotal(~api00, bsrs)
+svytotal(~api99+~api00, bsrs)
+```
+
+```julia
+total(:api00, bsrs)
+total([:api99, :api00], bsrs)
+```
+
+#### Computing quantiles
+
+```R
+svyquantile(~api00, bsrs, 0.5)
+svyquantile(~api00, bsrs, c(0.25, 0.5, 0.75))
+```
+
+```julia
+quantile(:api00, bsrs, 0.5)
+quantile(:api00, bsrs, [0.25, 0.5, 0.75])
+```
+
+#### Domain estimation
+
+```R
+svyby(~api00, ~cname, bsrs, svymean)
+```
+
+```julia
+mean(:api00, :cname, bsrs)
+```
diff --git a/docs/src/man/dataframes.md b/docs/src/man/dataframes.md
new file mode 100644
index 00000000..df310019
--- /dev/null
+++ b/docs/src/man/dataframes.md
@@ -0,0 +1,74 @@
+# [DataFrames in Survey](@id manual)
+
+The internal structure of a survey design is build upon
+[`DataFrames`](https://dataframes.juliadata.org/stable/). In fact, the `data`
+argument is the only required argument for the constructor and it must be an
+[`AbstractDataFrame`](https://dataframes.juliadata.org/stable/lib/types/#DataFrames.AbstractDataFrame).
+
+## Data manipulation
+
+The provided `DataFrame` is altered by the [`SurveyDesign`](@ref) constructor
+in order to add columns for frequency and probability weights, sample and
+population sizes and, if necessary, strata and cluster information.
+
+Notice the change in `apisrs`:
+
+```@setup manual_DataFrames
+using Survey
+```
+
+```@repl manual_DataFrames
+apisrs = load_data("apisrs")
+names(apisrs)
+srs = SurveyDesign(apisrs; weights=:pw);
+apisrs
+names(apisrs)
+```
+
+Five columns were added:
+
+- `false_strata` - only in the case of no stratification
+  
+  This column is necessary because when making a [`ReplicateDesign`](@ref), the
+  [`bootweights`](@ref) function uses [`groupby`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.groupby)
+  with a column representing the stratification variable. If there are no strata,
+  there is no such column so it should be added in order to keep `bootweights`
+  general.
+
+- `false_cluster` - only in the case of no clustering
+  
+  The reasoning is the same as in the case of no stratification.
+
+- `_sampsize` - sample sizes
+
+- `_popsize` - population sizes
+  
+  These match the stratification variable:
+
+```@repl manual_DataFrames
+apistrat = load_data("apistrat");
+strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
+apistrat[:, [:stype, :_sampsize, :_popsize]]
+```
+
+- `_allprobs` - probability weights
+
+No column was added for frequency weights because the column passed through the
+`weights` argument is used by other functions, hence there is no need to add a
+new column. If `weights` is not specified, then a column called `_weights` is
+added.
+
+## Why DataFrames
+
+Survey data most of the time, if not always, is structured in a way that is very
+well suited for data frames. The [`DataFrames.jl`](https://dataframes.juliadata.org/stable/)
+package is mature and well maintained and provides a lot of functionality that
+proves useful for using inside functions such as [`bootweights`](@ref) or
+[`mean`](@ref). Mainly, the functions used are
+[`groupby`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.groupby)
+and [`combine`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.combine).
+
+Now that support for [metadata](https://dataframes.juliadata.org/stable/lib/metadata/)
+was introduced in `DataFrames.jl`, it becomes possible to use metadata in
+`Survey.jl` to reduce space complexity. For example, stratification and clustering
+information could be stored as metadata of the `DataFrame` passed through `data`.
diff --git a/docs/src/man/future.md b/docs/src/man/future.md
new file mode 100644
index 00000000..99e3ce5a
--- /dev/null
+++ b/docs/src/man/future.md
@@ -0,0 +1 @@
+# Future plans
diff --git a/docs/src/man/plotting.md b/docs/src/man/plotting.md
new file mode 100644
index 00000000..2fecc07f
--- /dev/null
+++ b/docs/src/man/plotting.md
@@ -0,0 +1,20 @@
+# Plotting
+
+`Survey` uses [`AlgebraOfGraphics`](https://aog.makie.org/stable/) for plotting.
+All plotting functions support a variable number of keyword arguments (through
+`kwargs...`) that are passed internally to corresponding `AlgebraOfGraphics`
+functions. See the source code for details:
+[`plot`](https://github.com/xKDR/Survey.jl/blob/main/src/plot.jl),
+[`hist`](https://github.com/xKDR/Survey.jl/blob/main/src/hist.jl),
+[`boxplot`](https://github.com/xKDR/Survey.jl/blob/main/src/boxplot.jl).
+This means that all functionality provided by `AlgebraOfGraphics` is supported
+in `Survey`.
+
+Specific functionality might need to be imported from `AlgebraOfGraphics`.
+Moreover, in order to choose the preferred
+[`Makie backend`](https://docs.makie.org/stable/#makie_ecosystem) you must
+explicitly use it:
+
+```@repl
+using AlgebraOfGraphics, CairoMakie
+```
diff --git a/docs/src/man/replicate.md b/docs/src/man/replicate.md
new file mode 100644
index 00000000..dbfd5089
--- /dev/null
+++ b/docs/src/man/replicate.md
@@ -0,0 +1 @@
+# ReplicateDesign
diff --git a/docs/src/manual.md b/docs/src/manual.md
deleted file mode 100644
index 1adcc8c2..00000000
--- a/docs/src/manual.md
+++ /dev/null
@@ -1,199 +0,0 @@
-# Manual
-
-## DataFrames in Survey
-
-The internal structure of a survey design is build upon
-[`DataFrames`](https://dataframes.juliadata.org/stable/). In fact, the `data`
-argument is the only required argument for the constructor and it must be an
-[`AbstractDataFrame`](https://dataframes.juliadata.org/stable/lib/types/#DataFrames.AbstractDataFrame).
-
-### Data manipulation
-
-The provided `DataFrame` is altered by the [`SurveyDesign`](@ref) constructor
-in order to add columns for frequency and probability weights, sample and
-population sizes and, if necessary, strata and cluster information.
-
-Notice the change in `apisrs`:
-
-```@setup manual_DataFrames
-using Survey
-```
-
-```@repl manual_DataFrames
-apisrs = load_data("apisrs")
-names(apisrs)
-srs = SurveyDesign(apisrs; weights=:pw);
-apisrs
-names(apisrs)
-```
-
-Five columns were added:
-
-- `false_strata` - only in the case of no stratification
-  
-  This column is necessary because when making a [`ReplicateDesign`](@ref), the
-  [`bootweights`](@ref) function uses [`groupby`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.groupby)
-  with a column representing the stratification variable. If there are no strata,
-  there is no such column so it should be added in order to keep `bootweights`
-  general.
-
-- `false_cluster` - only in the case of no clustering
-  
-  The reasoning is the same as in the case of no stratification.
-
-- `_sampsize` - sample sizes
-
-- `_popsize` - population sizes
-  
-  These match the stratification variable:
-
-```@repl manual_DataFrames
-apistrat = load_data("apistrat");
-strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
-apistrat[:, [:stype, :_sampsize, :_popsize]]
-```
-
-- `_allprobs` - probability weights
-
-No column was added for frequency weights because the column passed through the
-`weights` argument is used by other functions, hence there is no need to add a
-new column. If `weights` is not specified, then a column called `_weights` is
-added.
-
-### Why DataFrames
-
-Survey data most of the time, if not always, is structured in a way that is very
-well suited for data frames. The [`DataFrames.jl`](https://dataframes.juliadata.org/stable/)
-package is mature and well maintained and provides a lot of functionality that
-proves useful for using inside functions such as [`bootweights`](@ref) or
-[`mean`](@ref). Mainly, the functions used are
-[`groupby`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.groupby)
-and [`combine`](https://dataframes.juliadata.org/stable/lib/functions/#DataFrames.combine).
-
-Now that support for [metadata](https://dataframes.juliadata.org/stable/lib/metadata/)
-was introduced in `DataFrames.jl`, it becomes possible to use metadata in
-`Survey.jl` to reduce space complexity. For example, stratification and clustering
-information could be stored as metadata of the `DataFrame` passed through `data`.
-
-## ReplicateDesign
-
-## Plotting
-
-`Survey` uses [`AlgebraOfGraphics`](https://aog.makie.org/stable/) for plotting.
-All plotting functions support a variable number of keyword arguments (through
-`kwargs...`) that are passed internally to corresponding `AlgebraOfGraphics`
-functions. See the source code for details:
-[`plot`](https://github.com/xKDR/Survey.jl/blob/main/src/plot.jl),
-[`hist`](https://github.com/xKDR/Survey.jl/blob/main/src/hist.jl),
-[`boxplot`](https://github.com/xKDR/Survey.jl/blob/main/src/boxplot.jl).
-This means that all functionality provided by `AlgebraOfGraphics` is supported
-in `Survey`.
-
-Specific functionality might need to be imported from `AlgebraOfGraphics`.
-Moreover, in order to choose the preferred
-[`Makie backend`](https://docs.makie.org/stable/#makie_ecosystem) you must
-explicitly use it:
-
-```@repl
-using AlgebraOfGraphics, CairoMakie
-```
-
-## Comparison with other languages
-
-There are multiple languages that offer survey analysis tools, most notably
-[SAS/STAT](https://support.sas.com/rnd/app/stat/procedures/SurveyAnalysis.html)
-and [R](https://CRAN.R-project.org/package=survey).
-
-### R comparison
-
-The inspiration for `Survey.jl` comes from R. Hence the syntax is in most cases
-very similar to the syntax in the [`survey` package](https://cran.r-project.org/web/packages/survey/survey.pdf)
-from R. To showcase this we will use the `apisrs` dataset found in both R's
-`survey` and `Survey.jl`. See the [Tutorial](@ref) section for more details about
-the `api` datesets.
-
-All examples show the R code first, followed by the Julia code.
-
-#### Loading data
-
-```R
-data(api)
-# all `api` datasets are loaded globally
-```
-
-```julia
-srs = load_data("apisrs")
-# only one dataset is loaded and stored in a variable
-```
-
-#### Creating a design
-
-```R
-srs = svydesign(id=~1, data=apisrs, weights=~pw) # simple random sample
-strat = svydesign(id=~1, data=apistrat, strata=~stype, weights=~pw) # stratified
-clus1 = svydesign(id=~dnum, data=apiclus1, weights=~pw) # clustered (one stage)
-```
-
-```julia
-srs = SurveyDesign(apisrs; weights=:pw) # simple random sample
-strat = SurveyDesign(apistrat; strata=:stype, weights=:pw) # stratified
-clus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # clustered (one stage)
-```
-
-#### Creating a replicate design
-
-```R
-bsrs = as.svrepdesign(srs, type="bootstrap")
-```
-
-```julia
-bsrs = bootweights(srs)
-```
-
-#### Computing the estimated mean
-
-```R
-svymean(~api00, bsrs)
-svymean(~api99+~api00, bsrs)
-```
-
-```julia
-mean(:api00, bsrs)
-mean([:api99, :api00], bsrs)
-```
-
-#### Computing the estimated total
-
-```R
-svytotal(~api00, bsrs)
-svytotal(~api99+~api00, bsrs)
-```
-
-```julia
-total(:api00, bsrs)
-total([:api99, :api00], bsrs)
-```
-
-#### Computing quantiles
-
-```R
-svyquantile(~api00, bsrs, 0.5)
-svyquantile(~api00, bsrs, c(0.25, 0.5, 0.75))
-```
-
-```julia
-quantile(:api00, bsrs, 0.5)
-quantile(:api00, bsrs, [0.25, 0.5, 0.75])
-```
-
-#### Domain estimation
-
-```R
-svyby(~api00, ~cname, bsrs, svymean)
-```
-
-```julia
-mean(:api00, :cname, bsrs)
-```
-
-## Future plans

From e28ab3ee2e1c55d015569b4c7ccb588fe0706836 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 20:09:21 +0200
Subject: [PATCH 72/80] Remove explicit url

---
 docs/src/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
index de7a09bc..629f10a0 100644
--- a/docs/src/getting_started.md
+++ b/docs/src/getting_started.md
@@ -38,7 +38,7 @@ data.
 !!! note
 
     The API program has been discontinued at the end of 2018. Information is archived
-    at [https://www.cde.ca.gov/re/pr/api.asp](https://www.cde.ca.gov/re/pr/api.asp)
+    [here](https://www.cde.ca.gov/re/pr/api.asp).
 
 ```@repl tutorial
 apisrs = load_data("apisrs")

From da66283f1a26731266e759913706788926c8fc16 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Fri, 27 Jan 2023 20:11:44 +0200
Subject: [PATCH 73/80] Rename Comparisons section

---
 docs/make.jl                | 2 +-
 docs/src/man/comparisons.md | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 8ef8a61b..9a67b5b0 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -22,7 +22,7 @@ makedocs(;
             "DataFrames in Survey" => "man/dataframes.md",
             "ReplicateDesign" => "man/replicate.md",
             "Plotting" => "man/plotting.md",
-            "Comparison with other languages" => "man/comparisons.md",
+            "Comparison with other survey analysis tools" => "man/comparisons.md",
             "Future plans" => "man/future.md",
         ],
         "API reference" => "api.md",
diff --git a/docs/src/man/comparisons.md b/docs/src/man/comparisons.md
index 232dd905..5a5328fd 100644
--- a/docs/src/man/comparisons.md
+++ b/docs/src/man/comparisons.md
@@ -1,6 +1,6 @@
-# Comparison with other languages
+# Comparison with other survey analysis tools
 
-There are multiple languages that offer survey analysis tools, most notably
+There are multiple alternatives that offer survey analysis tools, most notably
 [SAS/STAT](https://support.sas.com/rnd/app/stat/procedures/SurveyAnalysis.html)
 and [R](https://CRAN.R-project.org/package=survey).
 

From 0fbaf52761bab3075ff5f24c8f90ece2e1a11dc2 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <84318573+iuliadmtru@users.noreply.github.com>
Date: Sun, 29 Jan 2023 10:39:07 +0200
Subject: [PATCH 74/80] Update docs/src/getting_started.md

Add replicates

Co-authored-by: Ayush Patnaik <u6012645@anu.edu.au>
---
 docs/src/getting_started.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
index 629f10a0..b9020b4f 100644
--- a/docs/src/getting_started.md
+++ b/docs/src/getting_started.md
@@ -74,7 +74,7 @@ need to convert the [`SurveyDesign`](@ref) to a [`ReplicateDesign`](@ref) using
 bootstrapping:
 
 ```@repl tutorial
-bsrs = bootweights(srs)
+bsrs = bootweights(srs; replicates = 1000)
 ```
 
 We do this because [TODO: explain why]. Now we can compute the estimated mean:

From 8237c3f8a823a26d1bc51818d958b76b0d0d6daa Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <84318573+iuliadmtru@users.noreply.github.com>
Date: Sun, 29 Jan 2023 10:43:14 +0200
Subject: [PATCH 75/80] Update docs/src/man/comparisons.md

Add Stata reference
---
 docs/src/man/comparisons.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/src/man/comparisons.md b/docs/src/man/comparisons.md
index 5a5328fd..f76fd264 100644
--- a/docs/src/man/comparisons.md
+++ b/docs/src/man/comparisons.md
@@ -1,7 +1,8 @@
 # Comparison with other survey analysis tools
 
 There are multiple alternatives that offer survey analysis tools, most notably
-[SAS/STAT](https://support.sas.com/rnd/app/stat/procedures/SurveyAnalysis.html)
+[SAS](https://support.sas.com/rnd/app/stat/procedures/SurveyAnalysis.html),
+[Stata](https://www.stata.com/features/survey-methods/)
 and [R](https://CRAN.R-project.org/package=survey).
 
 ## R comparison

From 2c8a11ecca32098049b6462e623015860438f5e3 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <84318573+iuliadmtru@users.noreply.github.com>
Date: Sun, 29 Jan 2023 10:43:44 +0200
Subject: [PATCH 76/80] Update docs/src/man/comparisons.md

Change type to "subbootstrap"

Co-authored-by: Ayush Patnaik <u6012645@anu.edu.au>
---
 docs/src/man/comparisons.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/man/comparisons.md b/docs/src/man/comparisons.md
index f76fd264..fa9ffb54 100644
--- a/docs/src/man/comparisons.md
+++ b/docs/src/man/comparisons.md
@@ -44,7 +44,7 @@ clus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # clustered (one sta
 #### Creating a replicate design
 
 ```R
-bsrs = as.svrepdesign(srs, type="bootstrap")
+bsrs = as.svrepdesign(srs, type="subbootstrap")
 ```
 
 ```julia

From 1d4fccb7142362a60bd6989a89c21e7d3ea52087 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <84318573+iuliadmtru@users.noreply.github.com>
Date: Sun, 29 Jan 2023 10:55:07 +0200
Subject: [PATCH 77/80] Update docs/src/man/comparisons.md

Co-authored-by: Ayush Patnaik <u6012645@anu.edu.au>
---
 docs/src/man/comparisons.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/man/comparisons.md b/docs/src/man/comparisons.md
index fa9ffb54..ca0b7716 100644
--- a/docs/src/man/comparisons.md
+++ b/docs/src/man/comparisons.md
@@ -9,7 +9,7 @@ and [R](https://CRAN.R-project.org/package=survey).
 
 The inspiration for `Survey.jl` comes from R. Hence the syntax is in most cases
 very similar to the syntax in the [`survey` package](https://cran.r-project.org/web/packages/survey/survey.pdf)
-from R. To showcase this we will use the `apisrs` dataset found in both R's
+from R. To showcase this we will use the `api` datasets found in both R's
 `survey` and `Survey.jl`. See the [Tutorial](@ref) section for more details about
 the `api` datesets.
 

From 8a5d3672d87a9a5beef10d97b8e7dbaeedb14558 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Sun, 29 Jan 2023 10:54:08 +0200
Subject: [PATCH 78/80] Add `mean` with `SurveyDesign` and rephrase for
 bootstrap

---
 docs/src/getting_started.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
index b9020b4f..e9a6e063 100644
--- a/docs/src/getting_started.md
+++ b/docs/src/getting_started.md
@@ -69,17 +69,19 @@ basic usage of the package. For that, we will stick with a simple random sample.
 Now you can analyse your design according to your needs using the
 [functionality](@ref Index) provided by the package. For example, you can compute
 the estimated mean or population total for a given variable. Let's say we're
-interested in the mean Academic Performance Index from the year 1999. First we
-need to convert the [`SurveyDesign`](@ref) to a [`ReplicateDesign`](@ref) using
-bootstrapping:
+interested in the mean Academic Performance Index from the year 1999. If we are
+only interested in the estimated mean, then we can directly pass our design to
+the [`mean`](@ref) function:
 
 ```@repl tutorial
-bsrs = bootweights(srs; replicates = 1000)
+mean(:api99, srs)
 ```
 
-We do this because [TODO: explain why]. Now we can compute the estimated mean:
+If we also want to know the standard error of the mean, we need to convert the
+[`SurveyDesign`](@ref) to a [`ReplicateDesign`](@ref) using bootstrapping:
 
 ```@repl tutorial
+bsrs = bootweights(srs; replicates = 1000)
 mean(:api99, bsrs)
 ```
 

From ec6e0a3faeaa0bab9897beecc0d8822c3d3e51fa Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Sun, 29 Jan 2023 10:57:38 +0200
Subject: [PATCH 79/80] Minor alignment change

---
 docs/src/man/comparisons.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/man/comparisons.md b/docs/src/man/comparisons.md
index ca0b7716..c04829b8 100644
--- a/docs/src/man/comparisons.md
+++ b/docs/src/man/comparisons.md
@@ -2,8 +2,8 @@
 
 There are multiple alternatives that offer survey analysis tools, most notably
 [SAS](https://support.sas.com/rnd/app/stat/procedures/SurveyAnalysis.html),
-[Stata](https://www.stata.com/features/survey-methods/)
-and [R](https://CRAN.R-project.org/package=survey).
+[Stata](https://www.stata.com/features/survey-methods/) and
+[R](https://CRAN.R-project.org/package=survey).
 
 ## R comparison
 

From b9515abbf46c506b5fbfa6c750124421de90adc5 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Sun, 29 Jan 2023 13:17:52 +0200
Subject: [PATCH 80/80] Change "we" to "you" and minor rephrasing

---
 docs/src/getting_started.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
index e9a6e063..7495e93b 100644
--- a/docs/src/getting_started.md
+++ b/docs/src/getting_started.md
@@ -68,16 +68,16 @@ basic usage of the package. For that, we will stick with a simple random sample.
 
 Now you can analyse your design according to your needs using the
 [functionality](@ref Index) provided by the package. For example, you can compute
-the estimated mean or population total for a given variable. Let's say we're
-interested in the mean Academic Performance Index from the year 1999. If we are
-only interested in the estimated mean, then we can directly pass our design to
-the [`mean`](@ref) function:
+the estimated mean or population total for a given variable. Let's say you want
+to find the mean Academic Performance Index from the year 1999. If you are only
+interested in the estimated mean, then you can directly pass your design to the
+[`mean`](@ref) function:
 
 ```@repl tutorial
 mean(:api99, srs)
 ```
 
-If we also want to know the standard error of the mean, we need to convert the
+If you also want to know the standard error of the mean, you need to convert the
 [`SurveyDesign`](@ref) to a [`ReplicateDesign`](@ref) using bootstrapping:
 
 ```@repl tutorial
@@ -85,8 +85,8 @@ bsrs = bootweights(srs; replicates = 1000)
 mean(:api99, bsrs)
 ```
 
-We can also find the mean of both the 1999 API and 2000 API for a clear
-comparison between students' performance from one year to another:
+You can find the mean of both the 1999 API and 2000 API for a clear comparison
+between students' performance from one year to another:
 
 ```@repl tutorial
 mean([:api99, :api00], bsrs)
@@ -99,8 +99,8 @@ the two APIs:
 ratio(:api00, :api99, bsrs)
 ```
 
-If we're interested in a certain statistic estimated by a specific domain, we
-can add the domain as the second parameter to our function. Let's say we want
+If you're interested in a certain statistic estimated by a specific domain, you
+can add the domain as the second parameter to your function. Let's say you want
 to find the estimated total number of students enrolled in schools from each
 county:
 
@@ -108,7 +108,7 @@ county:
 total(:enroll, :cname, bsrs)
 ```
 
-Another way to visualize data is through graphs. We can make a histogram to
+Another way to visualize data is through graphs. You can make a histogram to
 better see the distribution of enrolled students:
 
 ```@setup warning