From f9efaea9f7314ae4ed61ff0aec0b9a0c40c11c3a Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Wed, 4 Jan 2023 21:19:10 +0530
Subject: [PATCH 01/11] Fix bug in total.

---
 Project.toml     |  1 +
 src/Survey.jl    |  1 +
 src/bootstrap.jl | 41 +++++++++++++++++++++++++++++++----------
 src/by.jl        |  2 +-
 src/mean.jl      | 12 ++++++------
 src/total.jl     |  2 +-
 6 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/Project.toml b/Project.toml
index 2659ebc2..f288d231 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,6 +10,7 @@ CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
diff --git a/src/Survey.jl b/src/Survey.jl
index f6d3d030..fee13d6d 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -11,6 +11,7 @@ using CairoMakie
 using AlgebraOfGraphics
 using CategoricalArrays
 using Random
+using Missings
 
 include("SurveyDesign.jl")
 include("bootstrap.jl")
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 70df8a81..a28fabe3 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -26,27 +26,48 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
     H = length(unique(design.data[!, design.strata]))
     stratified = groupby(design.data, design.strata)
     function replicate(stratified, H)
-        for j in 1:H
-            substrata = DataFrame(stratified[j])
+        for h in 1:H
+            substrata = DataFrame(stratified[h])
             psus = unique(substrata[!, design.cluster])
-            if length(psus) == 1
-                return DataFrame(statistic = X, SE = 0)
+            # @show psus
+            if length(psus) <= 1
+                return DataFrame(statistic = X, SE = 0) # bug! 
             end
             nh = length(psus)
             randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  
             rh = [(count(==(i), randinds)) for i in 1:nh] # main bootstrap algo. 
             gdf = groupby(substrata, design.cluster)
+            # @show keys(gdf)
             for i in 1:nh
-                gdf[i].rh = repeat([rh[i]], nrow(gdf[i]))
-            end
-            stratified[j].rh = DataFrame(gdf).rh
+                gdf[i].whij = repeat([rh[i]], nrow(gdf[i])) .* gdf[i].weights .* (nh / (nh - 1))
+            end            
+            stratified[h].whij = transform(gdf).whij
+            
         end
-        return DataFrame(stratified)
+        return transform(stratified, :whij)
     end
     df = replicate(stratified, H)
-    rename!(df,:rh => :replicate_1)
+    rename!(df,:whij => :replicate_1)
+    df.replicate_1 = disallowmissing(df.replicate_1)
     for i in 2:(replicates)
-        df[!, "replicate_"*string(i)] = Float64.(replicate(stratified, H).rh)
+        df[!, "replicate_"*string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
+end
+
+function bootstrap(x::Symbol, design::SurveyDesign, func = wsum; replicates = 100, rng = MersenneTwister(1234))
+    gdf = groupby(design.data, design.cluster)
+    psus = unique(design.data[!, design.cluster])
+    nh = length(psus)
+    X = func(design.data[:, x], design.data.weights)
+    Xt = Array{Float64, 1}(undef, replicates)
+    for i in 1:replicates
+        selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
+        xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
+        whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
+        Xt[i] = func(xhij, whij)
+    end 
+    @show Xt
+    variance = sum((Xt .- X).^2) / replicates
+    return DataFrame(statistic = X, SE = sqrt(variance))
 end
\ No newline at end of file
diff --git a/src/by.jl b/src/by.jl
index 30cb2dd2..be26d5a3 100644
--- a/src/by.jl
+++ b/src/by.jl
@@ -4,7 +4,7 @@ function bydomain(x::Symbol, domain::Symbol, design::ReplicateDesign, func::Func
     X = combine(gdf, [x, :weights] => ((a, b) -> func(a, weights(b))) => :statistic)
     Xt_mat = Array{Float64, 2}(undef, (nd, design.replicates))
     for i in 1:design.replicates
-        Xt_mat[:, i] = combine(gdf, [x, :weights, Symbol("replicate_"*string(i))] => ((a, b, c) -> func(a, weights(b .* c))) => :statistic).statistic
+        Xt_mat[:, i] = combine(gdf, [x, Symbol("replicate_"*string(i))] => ((a, c) -> func(a, weights(c))) => :statistic).statistic
     end
     ses = []
     for i in 1:nd
diff --git a/src/mean.jl b/src/mean.jl
index 501230d7..2bf8b925 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -4,21 +4,21 @@ julia> using Survey, Random, StatsBase;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
 
-julia> bclus1 = bootweights(apiclus1; replicates = 1000)
+julia> bclus1 = bootweights(dclus1; replicates = 1000)
 
 julia> mean(:api00, bclus1)
 1×2 DataFrame
  Row │ mean     SE      
      │ Float64  Float64 
 ─────┼──────────────────
-   1 │ 644.169  23.0897
+   1 │ 644.169  23.7208
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
     X = mean(design.data[!, x], weights(design.data.weights))
-    Xt = [mean(design.data[!, x], weights(design.data.weights .* design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
+    Xt = [mean(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(mean = X, SE = sqrt(variance))
 end
@@ -28,9 +28,9 @@ julia> using Survey, Random, StatsBase;
 
 julia> apiclus1 = load_data("apiclus1"); 
 
-julia> dclus1 = SurveyDesign(apiclus1, :dnum, :fpc); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
 
-julia> bclus1 = bootweights(apiclus1; replicates = 1000)
+julia> bclus1 = bootweights(dclus1; replicates = 1000)
 
 julia> mean(:api00, :cname, bclus1) |> print
 38×3 DataFrame
diff --git a/src/total.jl b/src/total.jl
index 3a3185c0..fdf83216 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -20,7 +20,7 @@ julia> total(:api00, bclus1)
 """
 function total(x::Symbol, design::ReplicateDesign)
     X = wsum(design.data[!, x], weights(design.data.weights))
-    Xt = [wsum(design.data[!, x], weights(design.data.weights .* design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
+    Xt = [wsum(design.data[!, x], weights(design.data[! , "replicate_"*string(i)])) for i in 1:design.replicates]
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(total = X, SE = sqrt(variance))
 end

From 7a65293550935ef7355a516e3a8deec3985cef7f Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Thu, 5 Jan 2023 11:18:08 +0530
Subject: [PATCH 02/11] Remove comments and unused function.

---
 src/bootstrap.jl | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index a28fabe3..9dd605c1 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -29,7 +29,6 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
         for h in 1:H
             substrata = DataFrame(stratified[h])
             psus = unique(substrata[!, design.cluster])
-            # @show psus
             if length(psus) <= 1
                 return DataFrame(statistic = X, SE = 0) # bug! 
             end
@@ -37,7 +36,6 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
             randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  
             rh = [(count(==(i), randinds)) for i in 1:nh] # main bootstrap algo. 
             gdf = groupby(substrata, design.cluster)
-            # @show keys(gdf)
             for i in 1:nh
                 gdf[i].whij = repeat([rh[i]], nrow(gdf[i])) .* gdf[i].weights .* (nh / (nh - 1))
             end            
@@ -53,21 +51,4 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
         df[!, "replicate_"*string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
-end
-
-function bootstrap(x::Symbol, design::SurveyDesign, func = wsum; replicates = 100, rng = MersenneTwister(1234))
-    gdf = groupby(design.data, design.cluster)
-    psus = unique(design.data[!, design.cluster])
-    nh = length(psus)
-    X = func(design.data[:, x], design.data.weights)
-    Xt = Array{Float64, 1}(undef, replicates)
-    for i in 1:replicates
-        selected_psus = psus[rand(rng, 1:nh, (nh-1))] # simple random sample of PSUs, with replacement. Select (nh-1) out of nh
-        xhij = (reduce(vcat, [gdf[(i,)][!, x] for i in selected_psus]))
-        whij = (reduce(vcat, [gdf[(i,)].weights * (nh / (nh - 1)) for i in selected_psus]))
-        Xt[i] = func(xhij, whij)
-    end 
-    @show Xt
-    variance = sum((Xt .- X).^2) / replicates
-    return DataFrame(statistic = X, SE = sqrt(variance))
 end
\ No newline at end of file

From af7ee993962a6c710b42c23c2fb757b09a30d25b Mon Sep 17 00:00:00 2001
From: ayushpatnaikgit <ayushpatnaik@gmail.com>
Date: Thu, 5 Jan 2023 11:27:03 +0530
Subject: [PATCH 03/11] Attemp bug fix.

---
 src/bootstrap.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 9dd605c1..2055b6bf 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -30,7 +30,7 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
             substrata = DataFrame(stratified[h])
             psus = unique(substrata[!, design.cluster])
             if length(psus) <= 1
-                return DataFrame(statistic = X, SE = 0) # bug! 
+                stratified[h].whij .= 0 # hasn't been tested yet. 
             end
             nh = length(psus)
             randinds = rand(rng, 1:(nh), (nh-1)) # Main bootstrap algo. Draw nh-1 out of nh, with replacement.  

From 9c0b68547be12744fa4390e5be8554615f89c4d7 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Mon, 9 Jan 2023 18:11:31 +0530
Subject: [PATCH 04/11] Fix and add tests for SRS for `total`

---
 test/total.jl | 144 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 129 insertions(+), 15 deletions(-)

diff --git a/test/total.jl b/test/total.jl
index 5825a319..4c3788a8 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -5,11 +5,20 @@
     apisrs = copy(apisrs_original)
     srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
-    @test tot.total[1] ≈ 4.06688749e6 atol = 1e-4
-    @test tot.SE[1] ≈ 60518.199 atol = 1e-1
-    # TODO: uncomment after correcting `total` function
-    # @test tot.total[1] ≈ 131317 atol = 1
-    # @test tot.SE[1] ≈ 1880.6 atol = 1e-1
+    @test tot.total[1] ≈ 4066888 rtol = 1e-5
+    @test tot.SE[1] ≈ 58526 rtol = 1e-1
+    mn = mean(:api00, srs)
+    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
+    @test mn.SE[1] ≈ 9.4488 rtol = 1e-1
+    # equivalent R code and results:
+    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
+    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    # > svytotal(~api00, srsrep)
+    #         total    SE
+    # api00 4066888 58526
+    # > svymean(~api00, srsrep)
+    #         mean     SE
+    # api00 656.58 9.4488
 
     # CategoricalArray
     # apisrs = copy(apisrs_original)
@@ -24,24 +33,129 @@
 
     # Vector{Symbol}
     apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
+    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total([:api00, :enroll], srs)
+    mn = mean([:api00, :enroll], srs)
     ## :api00
-    @test tot.total[1] ≈ 4066888 atol = 1
-    @test tot.SE[1] ≈ 60518.199 atol = 1
+    @test tot.total[1] ≈ 4066888 rtol = 1e-5
+    @test tot.SE[1] ≈ 57502 rtol = 1e-1
+    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
+    @test mn.SE[1] ≈ 9.2835 rtol = 1e-1
     ## :enroll
-    @test tot.total[2] ≈ 3621074 atol = 1
-    @test tot.SE[2] ≈ 173784.343 atol = 1
+    @test tot.total[2] ≈ 3621074 rtol = 1e-5
+    @test tot.SE[2] ≈ 176793 rtol = 1e-1
+    @test mn.mean[2] ≈ 584.61 rtol = 1e-5
+    @test mn.SE[2] ≈ 28.5427 rtol = 1e-1
+    # equivalent R code and results:
+    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
+    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    # > svytotal(~api00+~enroll, srsrep)
+    #         total     SE
+    # api00  4066888  57502
+    # enroll 3621074 176793
+    # > svymean(~api00+~enroll, srsrep)
+    #         mean      SE
+    # api00  656.58  9.2835
+    # enroll 584.61 28.5427
 
     # subpopulation
     apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; popsize = :fpc) |> bootweights
+    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, :cname, srs)
     @test size(tot)[1] == apisrs.cname |> unique |> length
-    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 atol = 1e-2
-    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122366.33 atol = 1e-2
-    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 atol = 1e-2
-    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38178.35 atol = 1e-2
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 rtol = 1e-5
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122193.02 rtol = 1e-1
+    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 rtol = 1e-5
+    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38862.71 rtol = 1e-1
+    mn = mean(:api00, :cname, srs)
+    @test size(mn)[1] == apisrs.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 658.1556 rtol = 1e-5
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 2.126852e+01 rtol = 1e-1
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 718.2857 rtol = 1e-5
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 5.835346e+01 rtol = 1e-1
+    # equivalent R code and results:
+    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
+    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    # > svyby(~api00, ~cname, srsrep, svytotal)
+    #                         cname    api00           se
+    # Alameda                 Alameda 230323.89  67808.91
+    # Calaveras             Calaveras  24466.30  24199.26
+    # Contra Costa       Contra Costa 213538.15  68780.65
+    # Fresno                   Fresno 148717.94  54174.78
+    # Imperial               Imperial  19263.34  19292.34
+    # Kern                       Kern 177643.92  56429.75
+    # Kings                     Kings  29080.83  20659.88
+    # Lake                       Lake  24899.88  24796.24
+    # Lassen                   Lassen  23289.44  23150.91
+    # Los Angeles         Los Angeles 917238.49 122193.02
+    # Madera                   Madera  44596.80  25684.62
+    # Marin                     Marin  74297.03  43018.64
+    # Merced                   Merced  18427.15  18057.21
+    # Modoc                     Modoc  20780.87  20977.35
+    # Monterey               Monterey  74947.40  38862.71
+    # Napa                       Napa  45030.38  31747.05
+    # Orange                   Orange 208861.68  66824.94
+    # Placer                   Placer  23506.23  23426.32
+    # Riverside             Riverside 177860.71  55697.57
+    # Sacramento           Sacramento 152620.16  53266.09
+    # San Bernardino   San Bernardino 247388.36  66806.58
+    # San Diego             San Diego 254387.58  71730.93
+    # San Francisco     San Francisco  51874.75  29597.88
+    # San Joaquin         San Joaquin 113102.44  46195.42
+    # San Luis Obispo San Luis Obispo  22886.83  22984.23
+    # San Mateo             San Mateo  38216.98  27075.67
+    # Santa Barbara     Santa Barbara  67700.42  38550.72
+    # Santa Clara         Santa Clara 155717.16  58101.15
+    # Santa Cruz           Santa Cruz  58006.81  34633.27
+    # Shasta                   Shasta  46702.76  32882.09
+    # Siskiyou               Siskiyou  21648.03  21667.03
+    # Solano                   Solano  57882.93  33095.96
+    # Sonoma                   Sonoma  19511.10  19782.71
+    # Stanislaus           Stanislaus  68412.73  39997.43
+    # Sutter                   Sutter  23041.68  22738.16
+    # Tulare                   Tulare  41128.16  28933.90
+    # Ventura                 Ventura 115177.43  51200.56
+    # Yolo                       Yolo  14710.75  14676.49
+    # > svyby(~api00, ~cname, srsrep, svymean)
+    #                         cname    api00           se
+    # Alameda                 Alameda 676.0909 3.522082e+01
+    # Calaveras             Calaveras 790.0000 0.000000e+00
+    # Contra Costa       Contra Costa 766.1111 5.435054e+01
+    # Fresno                   Fresno 600.2500 5.811781e+01
+    # Imperial               Imperial 622.0000 0.000000e+00
+    # Kern                       Kern 573.6000 4.634744e+01
+    # Kings                     Kings 469.5000 4.264356e+01
+    # Lake                       Lake 804.0000 0.000000e+00
+    # Lassen                   Lassen 752.0000 0.000000e+00
+    # Los Angeles         Los Angeles 658.1556 2.126852e+01
+    # Madera                   Madera 480.0000 3.461786e+00
+    # Marin                     Marin 799.6667 3.509912e+01
+    # Merced                   Merced 595.0000 0.000000e+00
+    # Modoc                     Modoc 671.0000 0.000000e+00
+    # Monterey               Monterey 605.0000 8.356655e+01
+    # Napa                       Napa 727.0000 4.770914e+01
+    # Orange                   Orange 749.3333 2.876956e+01
+    # Placer                   Placer 759.0000 0.000000e+00
+    # Riverside             Riverside 574.3000 2.789294e+01
+    # Sacramento           Sacramento 616.0000 3.785063e+01
+    # San Bernardino   San Bernardino 614.4615 2.985197e+01
+    # San Diego             San Diego 684.5000 3.254291e+01
+    # San Francisco     San Francisco 558.3333 4.404227e+01
+    # San Joaquin         San Joaquin 608.6667 4.153241e+01
+    # San Luis Obispo San Luis Obispo 739.0000 2.691382e-14
+    # San Mateo             San Mateo 617.0000 7.352923e+01
+    # Santa Barbara     Santa Barbara 728.6667 2.551393e+01
+    # Santa Clara         Santa Clara 718.2857 5.835346e+01
+    # Santa Cruz           Santa Cruz 624.3333 1.131098e+02
+    # Shasta                   Shasta 754.0000 5.731963e+01
+    # Siskiyou               Siskiyou 699.0000 0.000000e+00
+    # Solano                   Solano 623.0000 4.541173e+01
+    # Sonoma                   Sonoma 630.0000 0.000000e+00
+    # Stanislaus           Stanislaus 736.3333 5.176843e+00
+    # Sutter                   Sutter 744.0000 0.000000e+00
+    # Tulare                   Tulare 664.0000 2.061011e+01
+    # Ventura                 Ventura 743.8000 3.153839e+01
+    # Yolo                       Yolo 475.0000 0.000000e+00
 end
 
 @testset "total_Stratified" begin

From a5f3a4e13e858041b8ea4c2dd3460fcc014eae43 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Mon, 9 Jan 2023 18:44:17 +0530
Subject: [PATCH 05/11] Fix and add tests for Stratified, add constants for
 tolerances

---
 test/total.jl | 213 ++++++++++++++++----------------------------------
 1 file changed, 69 insertions(+), 144 deletions(-)

diff --git a/test/total.jl b/test/total.jl
index 4c3788a8..9b7c8e56 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -1,3 +1,6 @@
+const STAT_TOL = 1e-5
+const SE_TOL = 1e-1
+
 @testset "Simple random sample" begin
     apisrs_original = load_data("apisrs")
 
@@ -5,11 +8,11 @@
     apisrs = copy(apisrs_original)
     srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
-    @test tot.total[1] ≈ 4066888 rtol = 1e-5
-    @test tot.SE[1] ≈ 58526 rtol = 1e-1
+    @test tot.total[1] ≈ 4066888 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 58526 rtol = SE_TOL
     mn = mean(:api00, srs)
-    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
-    @test mn.SE[1] ≈ 9.4488 rtol = 1e-1
+    @test mn.mean[1] ≈ 656.58 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.4488 rtol = SE_TOL
     # equivalent R code and results:
     # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
     # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
@@ -20,35 +23,20 @@
     #         mean     SE
     # api00 656.58 9.4488
 
-    # CategoricalArray
-    # apisrs = copy(apisrs_original)
-    # apisrs[!, :cname] = CategoricalArrays.categorical(apisrs.cname)
-    # srs = SurveyDesign(apisrs; popsize = :fpc)
-    # tot = total(:cname, srs)
-    # @test size(tot)[1] == apisrs.cname |> unique |> length
-    # @test filter(:cname => ==("Alameda"), tot).total[1] ≈ 340.67 atol = 1e-2
-    # @test filter(:cname => ==("Alameda"), tot).SE[1] ≈ 98.472 atol = 1e-3
-    # @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 1393.65 atol = 1e-2
-    # @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 180.368 atol = 1e-3
-
     # Vector{Symbol}
-    apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total([:api00, :enroll], srs)
     mn = mean([:api00, :enroll], srs)
     ## :api00
-    @test tot.total[1] ≈ 4066888 rtol = 1e-5
-    @test tot.SE[1] ≈ 57502 rtol = 1e-1
-    @test mn.mean[1] ≈ 656.58 rtol = 1e-5
-    @test mn.SE[1] ≈ 9.2835 rtol = 1e-1
+    @test tot.total[1] ≈ 4066888 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 57502 rtol = SE_TOL
+    @test mn.mean[1] ≈ 656.58 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.2835 rtol = SE_TOL
     ## :enroll
-    @test tot.total[2] ≈ 3621074 rtol = 1e-5
-    @test tot.SE[2] ≈ 176793 rtol = 1e-1
-    @test mn.mean[2] ≈ 584.61 rtol = 1e-5
-    @test mn.SE[2] ≈ 28.5427 rtol = 1e-1
+    @test tot.total[2] ≈ 3621074 rtol = STAT_TOL
+    @test tot.SE[2] ≈ 176793 rtol = SE_TOL
+    @test mn.mean[2] ≈ 584.61 rtol = STAT_TOL
+    @test mn.SE[2] ≈ 28.5427 rtol = SE_TOL
     # equivalent R code and results:
-    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
-    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
     # > svytotal(~api00+~enroll, srsrep)
     #         total     SE
     # api00  4066888  57502
@@ -59,146 +47,83 @@
     # enroll 584.61 28.5427
 
     # subpopulation
-    apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, :cname, srs)
     @test size(tot)[1] == apisrs.cname |> unique |> length
-    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 rtol = 1e-5
-    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122193.02 rtol = 1e-1
-    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 rtol = 1e-5
-    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38862.71 rtol = 1e-1
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 917238.49 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 122193.02 rtol = SE_TOL
+    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 74947.40 rtol = STAT_TOL
+    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 38862.71 rtol = SE_TOL
     mn = mean(:api00, :cname, srs)
     @test size(mn)[1] == apisrs.cname |> unique |> length
-    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 658.1556 rtol = 1e-5
-    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 2.126852e+01 rtol = 1e-1
-    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 718.2857 rtol = 1e-5
-    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 5.835346e+01 rtol = 1e-1
-    # equivalent R code and results:
-    # > srs <- svydesign(data=apisrs, id=~1, weights=~pw)
-    # > srsrep <- as.svrepdesign(srs, type="bootstrap", replicates=4000)
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 658.1556 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 2.126852e+01 rtol = SE_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 718.2857 rtol = STAT_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 5.835346e+01 rtol = SE_TOL
+    # equivalent R code (results cause clutter):
     # > svyby(~api00, ~cname, srsrep, svytotal)
-    #                         cname    api00           se
-    # Alameda                 Alameda 230323.89  67808.91
-    # Calaveras             Calaveras  24466.30  24199.26
-    # Contra Costa       Contra Costa 213538.15  68780.65
-    # Fresno                   Fresno 148717.94  54174.78
-    # Imperial               Imperial  19263.34  19292.34
-    # Kern                       Kern 177643.92  56429.75
-    # Kings                     Kings  29080.83  20659.88
-    # Lake                       Lake  24899.88  24796.24
-    # Lassen                   Lassen  23289.44  23150.91
-    # Los Angeles         Los Angeles 917238.49 122193.02
-    # Madera                   Madera  44596.80  25684.62
-    # Marin                     Marin  74297.03  43018.64
-    # Merced                   Merced  18427.15  18057.21
-    # Modoc                     Modoc  20780.87  20977.35
-    # Monterey               Monterey  74947.40  38862.71
-    # Napa                       Napa  45030.38  31747.05
-    # Orange                   Orange 208861.68  66824.94
-    # Placer                   Placer  23506.23  23426.32
-    # Riverside             Riverside 177860.71  55697.57
-    # Sacramento           Sacramento 152620.16  53266.09
-    # San Bernardino   San Bernardino 247388.36  66806.58
-    # San Diego             San Diego 254387.58  71730.93
-    # San Francisco     San Francisco  51874.75  29597.88
-    # San Joaquin         San Joaquin 113102.44  46195.42
-    # San Luis Obispo San Luis Obispo  22886.83  22984.23
-    # San Mateo             San Mateo  38216.98  27075.67
-    # Santa Barbara     Santa Barbara  67700.42  38550.72
-    # Santa Clara         Santa Clara 155717.16  58101.15
-    # Santa Cruz           Santa Cruz  58006.81  34633.27
-    # Shasta                   Shasta  46702.76  32882.09
-    # Siskiyou               Siskiyou  21648.03  21667.03
-    # Solano                   Solano  57882.93  33095.96
-    # Sonoma                   Sonoma  19511.10  19782.71
-    # Stanislaus           Stanislaus  68412.73  39997.43
-    # Sutter                   Sutter  23041.68  22738.16
-    # Tulare                   Tulare  41128.16  28933.90
-    # Ventura                 Ventura 115177.43  51200.56
-    # Yolo                       Yolo  14710.75  14676.49
     # > svyby(~api00, ~cname, srsrep, svymean)
-    #                         cname    api00           se
-    # Alameda                 Alameda 676.0909 3.522082e+01
-    # Calaveras             Calaveras 790.0000 0.000000e+00
-    # Contra Costa       Contra Costa 766.1111 5.435054e+01
-    # Fresno                   Fresno 600.2500 5.811781e+01
-    # Imperial               Imperial 622.0000 0.000000e+00
-    # Kern                       Kern 573.6000 4.634744e+01
-    # Kings                     Kings 469.5000 4.264356e+01
-    # Lake                       Lake 804.0000 0.000000e+00
-    # Lassen                   Lassen 752.0000 0.000000e+00
-    # Los Angeles         Los Angeles 658.1556 2.126852e+01
-    # Madera                   Madera 480.0000 3.461786e+00
-    # Marin                     Marin 799.6667 3.509912e+01
-    # Merced                   Merced 595.0000 0.000000e+00
-    # Modoc                     Modoc 671.0000 0.000000e+00
-    # Monterey               Monterey 605.0000 8.356655e+01
-    # Napa                       Napa 727.0000 4.770914e+01
-    # Orange                   Orange 749.3333 2.876956e+01
-    # Placer                   Placer 759.0000 0.000000e+00
-    # Riverside             Riverside 574.3000 2.789294e+01
-    # Sacramento           Sacramento 616.0000 3.785063e+01
-    # San Bernardino   San Bernardino 614.4615 2.985197e+01
-    # San Diego             San Diego 684.5000 3.254291e+01
-    # San Francisco     San Francisco 558.3333 4.404227e+01
-    # San Joaquin         San Joaquin 608.6667 4.153241e+01
-    # San Luis Obispo San Luis Obispo 739.0000 2.691382e-14
-    # San Mateo             San Mateo 617.0000 7.352923e+01
-    # Santa Barbara     Santa Barbara 728.6667 2.551393e+01
-    # Santa Clara         Santa Clara 718.2857 5.835346e+01
-    # Santa Cruz           Santa Cruz 624.3333 1.131098e+02
-    # Shasta                   Shasta 754.0000 5.731963e+01
-    # Siskiyou               Siskiyou 699.0000 0.000000e+00
-    # Solano                   Solano 623.0000 4.541173e+01
-    # Sonoma                   Sonoma 630.0000 0.000000e+00
-    # Stanislaus           Stanislaus 736.3333 5.176843e+00
-    # Sutter                   Sutter 744.0000 0.000000e+00
-    # Tulare                   Tulare 664.0000 2.061011e+01
-    # Ventura                 Ventura 743.8000 3.153839e+01
-    # Yolo                       Yolo 475.0000 0.000000e+00
 end
 
-@testset "total_Stratified" begin
+@testset "Stratified sample" begin
     apistrat_original = load_data("apistrat")
 
     # base functionality
     apistrat = copy(apistrat_original)
     strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
     tot = total(:api00, strat)
-    @test tot.total[1] ≈ 4102208 atol = 10
-    @test tot.SE[1] ≈ 77211.61 atol = 1e-1
-    # without fpc
-    # TODO: uncomment after correcting `total` function
-    # @test tot.SE[1] ≈ 1690.4 atol = 1e-1
-
-    # CategoricalArray
-    # apistrat = copy(apistrat_original)
-    # apistrat[!, :cname] = CategoricalArrays.categorical(apistrat.cname)
-    # strat = StratifiedSample(apistrat, :stype; popsize = :fpc)
-    # TODO: uncomment after adding `CategoricalArray` support
-    # @test tot.SE[1] ≈ 1690.4 atol = 1e-1
-    # tot = total(:cname, strat)
-    # @test size(tot)[1] == apistrat.cname |> unique |> length
-    # @test filter(:cname => ==("Kern"), tot).total[1] ≈ 291.97 atol = 1e-2
-    # @test filter(:cname => ==("Kern"), tot).SE[1] ≈ 101.760 atol = 1e-3
-    # @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 1373.15 atol = 1e-2
-    # @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 199.635 atol = 1e-3
+    @test tot.total[1] ≈ 4102208 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 60746 rtol = SE_TOL
+    @test mn.mean[1] ≈ 662.29 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.8072 rtol = SE_TOL
+    # equivalent R code and results:
+    # > strat <- svydesign(data=apistrat, id=~1, weights=~pw, strata=~stype)
+    # > stratrep <- as.svrepdesign(strat, type="bootstrap", replicates=4000)
+    # > svytotal(~api00, stratrep)
+    #     total    SE
+    # api00 4102208 60746
+    # > svymean(~api00, stratrep)
+    #     mean     SE
+    # api00 662.29 9.8072
 
     # Vector{Symbol}
     tot = total([:api00, :enroll], strat)
+    mn = mean([:api00, :enroll], strat)
     ## :api00
-    @test tot.total[1] ≈ 4102208 atol = 1
-    @test tot.SE[1] ≈ 77211.61 atol = 1
+    @test tot.total[1] ≈ 4102208 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 60746 rtol = SE_TOL
+    @test mn.mean[1] ≈ 662.29 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 9.8072 rtol = SE_TOL
     ## :enroll
-    @test tot.total[2] ≈ 3687178 atol = 1
-    @test tot.SE[2] ≈ 127021.5540 atol = 1
+    @test tot.total[2] ≈ 3687178 rtol = STAT_TOL
+    @test tot.SE[2] ≈ 117322 rtol = SE_TOL
+    @test mn.mean[2] ≈ 595.28 rtol = STAT_TOL
+    @test mn.SE[2] ≈ 18.9412 rtol = SE_TOL
+    # equivalent R code and results:
+    # > svytotal(~api00+~enroll, stratrep)
+    # > svymean(~api00+~enroll, stratrep)
+    #         mean      SE
+    # api00  662.29  9.8072
+    # enroll 595.28 18.9412
 
     # subpopulation
-    # TODO: add functionality in `src/total.jl`
-    # TODO: add tests
+    tot = total(:api00, :cname, strat)
+    @test size(tot)[1] == apistrat.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 869905.98 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 134195.81 rtol = SE_TOL
+    @test filter(:cname => ==("Monterey"), tot).total[1] ≈ 72103.09 rtol = STAT_TOL
+    @test filter(:cname => ==("Monterey"), tot).SE[1] ≈ 45532.88 rtol = SE_TOL
+    mn = mean(:api00, :cname, strat)
+    @test size(mn)[1] == apistrat.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 633.5113 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 21.681068 rtol = SE_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 664.1212 rtol = STAT_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 48.817277 rtol = SE_TOL
+    # equivalent R code (results cause clutter):
+    # > svyby(~api00, ~cname, stratrep, svytotal)
+    # > svyby(~api00, ~cname, stratrep, svymean)
 end
 
-@testset "total_OneStageClusterSample" begin
+@testset "One stage cluster sample" begin
     # Load API datasets
     apiclus1_original = load_data("apiclus1")
     apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column

From 87c11dc01c23e55da98dcfd2cfe6dc58b573d72f Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Mon, 9 Jan 2023 19:55:15 +0530
Subject: [PATCH 06/11] Fix and add tests for Cluster and minor reordering

---
 test/total.jl | 87 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/test/total.jl b/test/total.jl
index 9b7c8e56..6ac6ab06 100644
--- a/test/total.jl
+++ b/test/total.jl
@@ -1,12 +1,11 @@
 const STAT_TOL = 1e-5
 const SE_TOL = 1e-1
 
-@testset "Simple random sample" begin
-    apisrs_original = load_data("apisrs")
+@testset "total SRS" begin
+    apisrs = load_data("apisrs")
+    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
 
     # base functionality
-    apisrs = copy(apisrs_original)
-    srs = SurveyDesign(apisrs; weights = :pw) |> bootweights
     tot = total(:api00, srs)
     @test tot.total[1] ≈ 4066888 rtol = STAT_TOL
     @test tot.SE[1] ≈ 58526 rtol = SE_TOL
@@ -64,15 +63,15 @@ const SE_TOL = 1e-1
     # > svyby(~api00, ~cname, srsrep, svymean)
 end
 
-@testset "Stratified sample" begin
-    apistrat_original = load_data("apistrat")
+@testset "total Stratified" begin
+    apistrat = load_data("apistrat")
+    strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
 
     # base functionality
-    apistrat = copy(apistrat_original)
-    strat = SurveyDesign(apistrat; strata = :stype, weights = :pw) |> bootweights
     tot = total(:api00, strat)
     @test tot.total[1] ≈ 4102208 rtol = STAT_TOL
     @test tot.SE[1] ≈ 60746 rtol = SE_TOL
+    mn = mean(:api00, strat)
     @test mn.mean[1] ≈ 662.29 rtol = STAT_TOL
     @test mn.SE[1] ≈ 9.8072 rtol = SE_TOL
     # equivalent R code and results:
@@ -123,18 +122,64 @@ end
     # > svyby(~api00, ~cname, stratrep, svymean)
 end
 
-@testset "One stage cluster sample" begin
-    # Load API datasets
-    apiclus1_original = load_data("apiclus1")
-    apiclus1_original[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
-    ##############################
-    # one-stage cluster sample
-    apiclus1 = copy(apiclus1_original)
-    dclus1 = SurveyDesign(apiclus1, clusters = :dnum, weights = :pw) |> bootweights
-    @test total(:api00,dclus1).total[1] ≈ 5949162 atol = 1
-    @test total(:api00,dclus1).SE[1] ≈ 1.3338978891316957e6 atol = 1
+@testset "total Cluster" begin
+    apiclus1 = load_data("apiclus1")
+    clus1 = SurveyDesign(apiclus1, clusters = :dnum, weights = :pw) |> bootweights
+
+    # base functionality
+    tot = total(:api00, clus1)
+    @test tot.total[1] ≈ 3989986 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 900323 rtol = SE_TOL
+    mn = mean(:api00, clus1)
+    @test mn.mean[1] ≈ 644.17 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 23.534 rtol = SE_TOL
+    # equivalent R code and results:
+    # > clus1 <- svydesign(data=apiclus1, id=~dnum, weights=~pw)
+    # > clus1rep <- as.svrepdesign(clus1, type="bootstrap", replicates=4000)
+    # > svytotal(~api00, clus1rep)
+    #         total     SE
+    # api00 3989986 900323
+    # > svymean(~api00, clus1rep)
+    #         mean     SE
+    # api00 644.17 23.534
 
-    @test total(:api00, dclus1).total[1] ≈ 5949162 atol = 1
-    @test total(:api00, dclus1).SE[1] ≈ 1352953 atol = 50000 # without fpc as it hasn't been figured out for bootstrap. 
-    
+    # Vector{Symbol}
+    tot = total([:api00, :enroll], clus1)
+    mn = mean([:api00, :enroll], clus1)
+    ## :api00
+    @test tot.total[1] ≈ 3989986 rtol = STAT_TOL
+    @test tot.SE[1] ≈ 900323 rtol = SE_TOL
+    @test mn.mean[1] ≈ 644.17 rtol = STAT_TOL
+    @test mn.SE[1] ≈ 23.534 rtol = SE_TOL
+    ## :enroll
+    @test tot.total[2] ≈ 3404940 rtol = STAT_TOL
+    @test tot.SE[2] ≈ 941501 rtol = SE_TOL
+    @test mn.mean[2] ≈ 549.72 rtol = STAT_TOL
+    @test mn.SE[2] ≈ 46.070 rtol = SE_TOL
+    # equivalent R code and results:
+    # > svytotal(~api00+~enroll, clus1rep)
+    #     total     SE
+    # api00  3989986 900323
+    # enroll 3404940 941501
+    # > svymean(~api00+~enroll, clus1rep)
+    #     mean     SE
+    # api00  644.17 23.534
+    # enroll 549.72 46.070
+
+    # subpopulation
+    tot = total(:api00, :cname, clus1)
+    @test size(tot)[1] == apiclus1.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), tot).total[1] ≈ 328620.49 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), tot).SE[1] ≈ 292840.83 rtol = SE_TOL
+    @test filter(:cname => ==("San Diego"), tot).total[1] ≈ 1227596.71 rtol = STAT_TOL
+    @test filter(:cname => ==("San Diego"), tot).SE[1] ≈ 860028.39 rtol = SE_TOL
+    mn = mean(:api00, :cname, clus1)
+    @test size(mn)[1] == apiclus1.cname |> unique |> length
+    @test filter(:cname => ==("Los Angeles"), mn).mean[1] ≈ 647.2667 rtol = STAT_TOL
+    @test filter(:cname => ==("Los Angeles"), mn).SE[1] ≈ 41.537132 rtol = 1 # tolerance is too large
+    @test filter(:cname => ==("Santa Clara"), mn).mean[1] ≈ 732.0769 rtol = STAT_TOL
+    @test filter(:cname => ==("Santa Clara"), mn).SE[1] ≈ 52.336574 rtol = SE_TOL
+    # equivalent R code (results cause clutter):
+    # > svyby(~api00, ~cname, clus1rep, svytotal)
+    # > svyby(~api00, ~cname, clus1rep, svymean)
 end
\ No newline at end of file

From 81b77e8c55fd3020585354615fa1358bb24476aa Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 16:52:42 +0530
Subject: [PATCH 07/11] Remove references to old designs, add references to new
 functions

---
 docs/src/api.md   | 29 +++++++++--------------------
 docs/src/index.md | 41 +++++------------------------------------
 src/Survey.jl     |  5 ++---
 3 files changed, 16 insertions(+), 59 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 062d379d..5431b9ae 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -7,27 +7,19 @@ Module = [Survey]
 Order = [:type, :function]
 Private = false
 ```
-Survey data can be loaded from a `DataFrame` into a survey design. The package currently supports simple random sample and stratified sample designs. 
-```@docs
-AbstractSurveyDesign
-SimpleRandomSample
-StratifiedSample
-```
 
 ```@docs
+AbstractSurveyDesign
+SurveyDesign
+ReplicateDesign
 load_data
-Survey.mean(x::Symbol, design::SimpleRandomSample)
-total(x::Symbol, design::SimpleRandomSample)
+bootweights
+mean(x::Symbol, design::ReplicateDesign)
+mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
+total(x::Symbol, design::ReplicateDesign)
+total(x::Symbol, domain::Symbol, design::ReplicateDesign)
 quantile
-```
-
-It is often required to estimate population parameters for sub-populations of interest. For example, you may have a sample of heights, but you want the average heights of males and females separately. 
-```@docs
-mean(x::Symbol, by::Symbol, design::SimpleRandomSample) 
-total(x::Symbol, by::Symbol, design::SimpleRandomSample) 
-```
-```@docs
-ratio(variable_num:: Symbol, variable_den:: Symbol, design::SurveyDesign)
+ratio(variable_num::Symbol, variable_den::Symbol, design::SurveyDesign)
 plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 hist(design::AbstractSurveyDesign, var::Symbol,
@@ -35,7 +27,4 @@ hist(design::AbstractSurveyDesign, var::Symbol,
 				 normalization = :density,
 				 kwargs...
     			)
-dim(design::AbstractSurveyDesign)
-dimnames(design::AbstractSurveyDesign)
-colnames(design::AbstractSurveyDesign)
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
index a099c73c..eddbcf0f 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -18,45 +18,14 @@ with at least 100 students and for various probability samples of the data.
 The API program has been discontinued at the end of 2018. Information is archived at
 [https://www.cde.ca.gov/re/pr/api.asp](https://www.cde.ca.gov/re/pr/api.asp)
 
-Firstly, a survey design needs a dataset from which to gather information. 
-
-
-The sample datasets provided with the package can be loaded as `DataFrames` using the `load_data` function:
+Firstly, a survey design needs a dataset from which to gather information. The sample
+datasets provided with the package can be loaded as `DataFrame`s using [`load_data`](@ref):
 
 ```julia
 julia> apisrs = load_data("apisrs");
 ```
-`apisrs` is a simple random sample of the Academic Performance Index of Californian schools.
-
-Next, we can build a design. The design corresponding to a simple random sample is [`SimpleRandomSample`](@ref), which can be instantiated by calling the constructor:
-
-```julia
-julia> srs = SimpleRandomSample(apisrs; weights = :pw)
-SimpleRandomSample:
-data: 200x42 DataFrame
-weights: 31.0, 31.0, 31.0, ..., 31.0
-probs: 0.0323, 0.0323, 0.0323, ..., 0.0323
-fpc: 6194, 6194, 6194, ..., 6194
-popsize: 6194
-sampsize: 200
-sampfraction: 0.0323
-ignorefpc: false
-```
 
-With a `SimpleRandomSample` (as well as with any subtype of [`AbstractSurveyDesign`](@ref)) it is possible to calculate estimates of the mean, population total, etc., for a given variable, along with the corresponding standard errors.
+`apisrs` is a simple random sample of the Academic Performance Index of Californian schools.
 
-```julia
-julia> mean(:api00, srs)
-1×2 DataFrame
- Row │ mean     sem     
-     │ Float64  Float64 
-─────┼──────────────────
-   1 │ 656.585  9.24972
-
-julia> total(:api00, srs)
-1×2 DataFrame
- Row │ total      se_total 
-     │ Float64    Float64  
-─────┼─────────────────────
-   1 │ 4.06689e6   57292.8
-```
+Next, we can build a design.
+#TODO: continue tutorial
diff --git a/src/Survey.jl b/src/Survey.jl
index fee13d6d..dd71a092 100644
--- a/src/Survey.jl
+++ b/src/Survey.jl
@@ -28,12 +28,11 @@ include("ratio.jl")
 include("by.jl")
 
 export load_data
-export AbstractSurveyDesign, SimpleRandomSample, StratifiedSample
-export SurveyDesign
+export AbstractSurveyDesign, SurveyDesign, ReplicateDesign
 export dim, colnames, dimnames
 export mean, total, quantile
 export plot
-export hist
+export hist, sturges, freedman_diaconis
 export boxplot
 export bootweights
 export jkknife

From 751813822326ea0e8001b0f42314dfc5e16795d7 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 16:57:01 +0530
Subject: [PATCH 08/11] Fix docstring and minor style modifications

---
 src/bootstrap.jl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/bootstrap.jl b/src/bootstrap.jl
index 2055b6bf..b4e226a8 100644
--- a/src/bootstrap.jl
+++ b/src/bootstrap.jl
@@ -1,15 +1,15 @@
 """
 ```jldoctest
-julia> using Survey, Random;
+julia> using Random
 
-julia> apiclus1 = load_data("apiclus1"); 
+julia> apiclus1 = load_data("apiclus1");
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum); 
+julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum);
 
-julia> rng = MersenneTwister(111); 
+julia> rng = MersenneTwister(111);
 
-julia> Survey.bootweights(dclus1; replicates=1000, rng) 
-Survey.ReplicateDesign:
+julia> bootweights(dclus1; replicates=1000, rng)
+ReplicateDesign:
 data: 183x1046 DataFrame
 cluster: dnum
 design.data[!,design.cluster]: 637, 637, 637, ..., 448
@@ -22,7 +22,7 @@ design.data[!,:allprobs]: 1.0, 1.0, 1.0, ..., 1.0
 replicates: 1000
 ```
 """
-function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwister(1234))
+function bootweights(design::SurveyDesign; replicates=4000, rng=MersenneTwister(1234))
     H = length(unique(design.data[!, design.strata]))
     stratified = groupby(design.data, design.strata)
     function replicate(stratified, H)
@@ -45,10 +45,10 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
         return transform(stratified, :whij)
     end
     df = replicate(stratified, H)
-    rename!(df,:whij => :replicate_1)
+    rename!(df, :whij => :replicate_1)
     df.replicate_1 = disallowmissing(df.replicate_1)
     for i in 2:(replicates)
-        df[!, "replicate_"*string(i)] = disallowmissing(replicate(stratified, H).whij)
+        df[!, "replicate_" * string(i)] = disallowmissing(replicate(stratified, H).whij)
     end 
     return ReplicateDesign(df, design.cluster, design.popsize, design.sampsize, design.strata, design.pps, replicates) 
 end
\ No newline at end of file

From efe2f6a32db2c9fddfc61135ec7baf54aa379547 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 17:02:57 +0530
Subject: [PATCH 09/11] Convert indentation to spaces

---
 docs/src/api.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 5431b9ae..5b538a55 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -23,8 +23,8 @@ ratio(variable_num::Symbol, variable_den::Symbol, design::SurveyDesign)
 plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...)
 hist(design::AbstractSurveyDesign, var::Symbol,
-				 bins::Union{Integer, AbstractVector} = freedman_diaconis(design, var);
-				 normalization = :density,
-				 kwargs...
-    			)
+                 bins::Union{Integer, AbstractVector} = freedman_diaconis(design, var);
+                 normalization = :density,
+                 kwargs...
+                )
 ```

From 2bac93875eaa85a5a16fb06cbb2fa513d16684a0 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 17:16:22 +0530
Subject: [PATCH 10/11] Fix docstrings, minor rearrangements and style checks

---
 src/SurveyDesign.jl |  65 ++++++++++++++------------
 src/mean.jl         | 108 ++++++++++++++++++--------------------------
 src/total.jl        |  81 ++++++++++++++++++---------------
 3 files changed, 124 insertions(+), 130 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 1541a3b9..07b9e1de 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -13,35 +13,32 @@ abstract type AbstractSurveyDesign end
 """
     SurveyDesign <: AbstractSurveyDesign
 
-Survey design sampled by one stage clusters sampling.
-Clusters chosen by SRS followed by complete sampling of selected clusters.
-Assumes each individual in one and only one clusters; disjoint and nested clusters.
+General survey design encompassing a simple random, stratified, cluster or multi-stage design.
 
-`clusters` must be specified as a Symbol name of a column in `data`.
+In the case of cluster sample, the clusters are chosen by simple random sampling. All
+individuals in one cluster are sampled. The clusters are considered disjoint and nested.
 
 # Arguments:
 `data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
-`clusters::Symbol`: the stratification variable - must be given as a column in `data`.
-`popsize::Union{Nothing,Symbol,<:Unsigned,Vector{<:Real}}=nothing`: the (expected) survey population size. For 
-
-`weights::Union{Nothing,Symbol,Vector{<:Real}}=nothing`: the sampling weights.
+`strata::Union{Nothing, Symbol}=nothing`: the stratification variable - must be given as a column in `data`.
+`clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing`: the clustering variable - must be given as column(s) in `data`.
+`weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
+`popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
 
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");
 
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
-
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw)
+julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw)
 SurveyDesign:
 data: 183x46 DataFrame
 cluster: dnum
 design.data[!,design.cluster]: 637, 637, 637, ..., 448
 popsize: popsize
-design.data[!,design.popsize]: 9240.0, 9240.0, 9240.0, ..., 9240.0
+design.data[!,design.popsize]: 6190.0, 6190.0, 6190.0, ..., 6190.0
 sampsize: sampsize
 design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
-design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
+design.data[!,:probs]: 0.0295, 0.0295, 0.0295, ..., 0.0295
+design.data[!,:allprobs]: 0.0295, 0.0295, 0.0295, ..., 0.0295
 ```
 """
 struct SurveyDesign <: AbstractSurveyDesign
@@ -52,9 +49,15 @@ struct SurveyDesign <: AbstractSurveyDesign
     strata::Symbol
     pps::Bool
     # Single stage clusters sample, like apiclus1
-    function SurveyDesign(data::AbstractDataFrame; strata::Union{Nothing,Symbol} = nothing, weights::Union{Nothing,Symbol}= nothing, clusters::Union{Nothing, Symbol, Vector{Symbol}} = nothing, popsize::Union{Nothing, Int,Symbol}=nothing) 
+    function SurveyDesign(
+		data::AbstractDataFrame;
+        strata::Union{Nothing, Symbol}=nothing,
+        clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing,
+        weights::Union{Nothing, Symbol}=nothing,
+        popsize::Union{Nothing, Int, Symbol}=nothing
+    )
         # sampsize here is number of clusters completely sampled, popsize is total clusters in population
-        if typeof(strata) <:Nothing
+        if typeof(strata) <: Nothing
             data.false_strata = repeat(["FALSE_STRATA"], nrow(data))
             strata = :false_strata
         end
@@ -71,7 +74,7 @@ struct SurveyDesign <: AbstractSurveyDesign
         end
         # For one-stage sample only one sampsize vector
         sampsize_labels = :sampsize
-        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])),(nrow(data),))
+        data[!, sampsize_labels] = fill(length(unique(data[!, cluster])), (nrow(data),))
         if !(typeof(popsize) <: Nothing)
             data[!, :weights] = data[!, popsize] ./ data[!, sampsize_labels]
         elseif !(typeof(weights) <: Nothing)
@@ -91,24 +94,26 @@ struct SurveyDesign <: AbstractSurveyDesign
 end
 
 """
-```jldoctest
-julia> apiclus1 = load_data("apiclus1");
+    ReplicateDesign <: AbstractSurveyDesign
 
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)); # Correct api mistake for pw column
+Survey design obtained by replicating an original design using [`bootweights`](@ref).
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apistrat = load_data("apistrat");
 
-julia> bclus1 = Survey.bootweights(dclus1; replicates = 1000)
-Survey.ReplicateDesign:
-data: 183x1046 DataFrame
-cluster: dnum
-design.data[!,design.cluster]: 637, 637, 637, ..., 448
+julia> strat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
+
+julia> bootstrat = bootweights(strat; replicates=1000)
+ReplicateDesign:
+data: 200x1046 DataFrame
+cluster: false_cluster
+design.data[!,design.cluster]: 1, 2, 3, ..., 200
 popsize: popsize
-design.data[!,design.popsize]: 9240.0, 9240.0, 9240.0, ..., 9240.0
+design.data[!,design.popsize]: 6190.0, 6190.0, 6190.0, ..., 6190.0
 sampsize: sampsize
-design.data[!,design.sampsize]: 15, 15, 15, ..., 15
-design.data[!,:probs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
-design.data[!,:allprobs]: 0.0198, 0.0198, 0.0198, ..., 0.0198
+design.data[!,design.sampsize]: 200, 200, 200, ..., 200
+design.data[!,:probs]: 0.0226, 0.0226, 0.0226, ..., 0.0662
+design.data[!,:allprobs]: 0.0226, 0.0226, 0.0226, ..., 0.0662
 replicates: 1000
 ```
 """
diff --git a/src/mean.jl b/src/mean.jl
index 2bf8b925..0ef5bb37 100644
--- a/src/mean.jl
+++ b/src/mean.jl
@@ -1,19 +1,27 @@
 """
-```jldoctest
-julia> using Survey, Random, StatsBase; 
+    mean(var, design)
 
-julia> apiclus1 = load_data("apiclus1"); 
+Compute the estimated mean of one or more variables within a survey design.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000)
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> mean(:api00, bclus1)
+julia> mean(:api00, clus1)
 1×2 DataFrame
- Row │ mean     SE      
-     │ Float64  Float64 
+ Row │ mean     SE
+     │ Float64  Float64
 ─────┼──────────────────
-   1 │ 644.169  23.7208
+   1 │ 644.169  23.2919
+
+julia> mean([:api00, :enroll], clus1)
+2×3 DataFrame
+ Row │ names   mean     SE
+     │ String  Float64  Float64
+─────┼──────────────────────────
+   1 │ api00   644.169  23.2919
+   2 │ enroll  549.716  45.3655
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
@@ -22,59 +30,39 @@ function mean(x::Symbol, design::ReplicateDesign)
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(mean = X, SE = sqrt(variance))
 end
+
+function mean(x::Vector{Symbol}, design::ReplicateDesign)
+    df = reduce(vcat, [mean(i, design) for i in x])
+    insertcols!(df, 1, :names => String.(x))
+    return df
+end
+
 """
-```jldoctest
-julia> using Survey, Random, StatsBase; 
+    mean(var, domain, design)
 
-julia> apiclus1 = load_data("apiclus1"); 
+Compute the estimated mean within a domain.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000)
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> mean(:api00, :cname, bclus1) |> print
-38×3 DataFrame
- Row │ cname            statistic  SE          
-     │ String15         Float64    Any         
-─────┼─────────────────────────────────────────
-   1 │ Kern               573.6    44.5578
-   2 │ Los Angeles        658.156  22.2058
-   3 │ Orange             749.333  29.5701
-   4 │ San Luis Obispo    739.0    3.37273e-14
-   5 │ San Francisco      558.333  45.6266
-   6 │ Modoc              671.0    0.0
-   7 │ Alameda            676.091  37.3104
-   8 │ Solano             623.0    45.1222
-   9 │ Santa Cruz         624.333  113.43
-  10 │ Monterey           605.0    85.4116
-  11 │ San Bernardino     614.462  30.0066
-  12 │ Riverside          574.3    27.2025
-  13 │ Tulare             664.0    22.0097
-  14 │ San Diego          684.5    32.2241
-  15 │ Sacramento         616.0    39.7877
-  16 │ Marin              799.667  35.2397
-  17 │ Imperial           622.0    0.0
-  18 │ Ventura            743.8    31.7425
-  19 │ San Joaquin        608.667  40.8592
-  20 │ Sonoma             630.0    0.0
-  21 │ Fresno             600.25   56.9173
-  22 │ Santa Clara        718.286  58.562
-  23 │ Sutter             744.0    0.0
-  24 │ Contra Costa       766.111  53.598
-  25 │ Stanislaus         736.333  5.26576
-  26 │ Madera             480.0    3.5861
-  27 │ Placer             759.0    0.0
-  28 │ Lassen             752.0    0.0
-  29 │ Santa Barbara      728.667  25.8749
-  30 │ San Mateo          617.0    78.1173
-  31 │ Siskiyou           699.0    0.0
-  32 │ Kings              469.5    44.6284
-  33 │ Shasta             754.0    60.5829
-  34 │ Yolo               475.0    0.0
-  35 │ Calaveras          790.0    0.0
-  36 │ Napa               727.0    50.5542
-  37 │ Lake               804.0    0.0
-  38 │ Merced             595.0    0
+julia> mean(:api00, :cname, clus1)
+11×3 DataFrame
+ Row │ cname        mean     SE
+     │ String15     Float64  Any
+─────┼───────────────────────────────────
+   1 │ Alameda      669.0    1.27388e-13
+   2 │ Fresno       472.0    1.13687e-13
+   3 │ Kern         452.5    0.0
+   4 │ Los Angeles  647.267  47.4938
+   5 │ Mendocino    623.25   1.0931e-13
+   6 │ Merced       519.25   4.57038e-15
+   7 │ Orange       710.563  2.19684e-13
+   8 │ Plumas       709.556  1.27773e-13
+   9 │ San Diego    659.436  2.63446
+  10 │ San Joaquin  551.189  2.17471e-13
+  11 │ Santa Clara  732.077  56.2584
 ```
 """
 function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
@@ -82,10 +70,4 @@ function mean(x::Symbol, domain::Symbol, design::ReplicateDesign)
     df = bydomain(x, domain, design, weighted_mean)
     rename!(df, :statistic => :mean)
     return df
-end
-
-function mean(x::Vector{Symbol}, design::ReplicateDesign)
-    df = reduce(vcat, [mean(i, design) for i in x])
-    insertcols!(df, 1, :names => String.(x))
-    return df
 end
\ No newline at end of file
diff --git a/src/total.jl b/src/total.jl
index fdf83216..e5fbbdcb 100644
--- a/src/total.jl
+++ b/src/total.jl
@@ -1,21 +1,27 @@
 """
-```jldoctest
-julia> using Survey; 
-
-julia> apiclus1 = load_data("apiclus1"); 
+    total(var, design)
 
-julia> apiclus1[!, :pw] = fill(757/15,(size(apiclus1_original,1),)) # Correct api mistake for pw column
+Compute the estimated population total for one or more variables within a survey design.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000); 
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> total(:api00, bclus1)
+julia> total(:api00, clus1)
 1×2 DataFrame
- Row │ total      SE        
-     │ Float64    Float64   
+ Row │ total      SE
+     │ Float64    Float64
 ─────┼──────────────────────
-   1 │ 5.94916e6  1.31977e6
+   1 │ 3.98999e6  9.22175e5
+
+julia> total([:api00, :enroll], clus1)
+2×3 DataFrame
+ Row │ names   total      SE
+     │ String  Float64    Float64
+─────┼──────────────────────────────
+   1 │ api00   3.98999e6  9.22175e5
+   2 │ enroll  3.40494e6  9.51557e5
 ```
 """
 function total(x::Symbol, design::ReplicateDesign)
@@ -24,41 +30,42 @@ function total(x::Symbol, design::ReplicateDesign)
     variance = sum((Xt .- X).^2) / design.replicates
     DataFrame(total = X, SE = sqrt(variance))
 end
+
+function total(x::Vector{Symbol}, design::ReplicateDesign)
+    df = reduce(vcat, [total(i, design) for i in x])
+    insertcols!(df, 1, :names => String.(x))
+    return df
+end
+
 """
-```jldoctest
-julia> using Survey; 
+    total(var, domain, design)
 
-julia> apiclus1 = load_data("apiclus1"); 
+Compute the estimated population total within a domain.
 
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); 
+```jldoctest
+julia> apiclus1 = load_data("apiclus1");
 
-julia> bclus1 = bootweights(dclus1; replicates = 1000); 
+julia> clus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) |> bootweights;
 
-julia> total(:api00, :cname, bclus1) |> print
+julia> total(:api00, :cname, clus1)
 11×3 DataFrame
- Row │ cname        statistic      SE        
-     │ String15     Float64        Any       
-─────┼───────────────────────────────────────
-   1 │ Alameda          3.71384e5  3.78375e5
-   2 │ Fresno       95281.1        96134.8
-   3 │ Kern         45672.3        43544.7
-   4 │ Los Angeles      4.89981e5  4.42865e5
-   5 │ Mendocino        1.25813e5  1.22757e5
-   6 │ Merced           1.04819e5  1.09032e5
-   7 │ Orange           5.73756e5  6.01213e5
-   8 │ Plumas           3.2228e5   3.26443e5
-   9 │ San Diego        1.83038e6  1.34155e6
-  10 │ San Joaquin      1.02922e6  1.04048e6
-  11 │ Santa Clara      9.60583e5  643492.0
+ Row │ cname        total           SE
+     │ String15     Float64         Any
+─────┼────────────────────────────────────────
+   1 │ Alameda      249080.0        2.48842e5
+   2 │ Fresno        63903.1        64452.2
+   3 │ Kern          30631.5        31083.0
+   4 │ Los Angeles       3.2862e5   2.93649e5
+   5 │ Mendocino     84380.6        83154.4
+   6 │ Merced        70300.2        69272.5
+   7 │ Orange            3.84807e5  3.90097e5
+   8 │ Plumas            2.16147e5  2.17811e5
+   9 │ San Diego         1.2276e6   8.78559e5
+  10 │ San Joaquin       6.90276e5  6.90685e5
+  11 │ Santa Clara       6.44244e5  4.09943e5
 ```
 """
 function total(x::Symbol, domain::Symbol, design::ReplicateDesign)
     df = bydomain(x, domain, design, wsum)
     rename!(df, :statistic => :total)
-end
-
-function total(x::Vector{Symbol}, design::ReplicateDesign)
-    df = reduce(vcat, [total(i, design) for i in x])
-    insertcols!(df, 1, :names => String.(x))
-    return df
 end
\ No newline at end of file

From 76275cd66b93f0b5794d8a8638023c2e2857da01 Mon Sep 17 00:00:00 2001
From: Iulia Dumitru <iuliadmtru@gmail.com>
Date: Tue, 10 Jan 2023 17:26:11 +0530
Subject: [PATCH 11/11] Fix argument rendering in docstring

---
 src/SurveyDesign.jl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl
index 07b9e1de..69657cd0 100644
--- a/src/SurveyDesign.jl
+++ b/src/SurveyDesign.jl
@@ -18,12 +18,14 @@ General survey design encompassing a simple random, stratified, cluster or multi
 In the case of cluster sample, the clusters are chosen by simple random sampling. All
 individuals in one cluster are sampled. The clusters are considered disjoint and nested.
 
+`strata` and `clusters` must be given as columns in `data`.
+
 # Arguments:
-`data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
-`strata::Union{Nothing, Symbol}=nothing`: the stratification variable - must be given as a column in `data`.
-`clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing`: the clustering variable - must be given as column(s) in `data`.
-`weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
-`popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
+- `data::AbstractDataFrame`: the survey dataset (!this gets modified by the constructor).
+- `strata::Union{Nothing, Symbol}=nothing`: the stratification variable.
+- `clusters::Union{Nothing, Symbol, Vector{Symbol}}=nothing`: the clustering variable.
+- `weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
+- `popsize::Union{Nothing, Int, Symbol}=nothing`: the (expected) survey population size.
 
 ```jldoctest
 julia> apiclus1 = load_data("apiclus1");