From 9e7a2aeb1f36551a758ae673f5a1461808cd0a7d Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Tue, 6 Dec 2022 10:05:47 +0200 Subject: [PATCH 1/3] Remove old design from tests --- test/dimnames.jl | 18 +++++------------- test/quantile.jl | 12 +++--------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/test/dimnames.jl b/test/dimnames.jl index 93817380..6241bd64 100644 --- a/test/dimnames.jl +++ b/test/dimnames.jl @@ -3,22 +3,14 @@ apisrs = load_data("apisrs") # make a copy to not modify the original dataset apisrs_copy = copy(apisrs) - srs_new = SimpleRandomSample(apisrs_copy,popsize=:fpc,ignorefpc = true) - # make a new copy to use for the old design - apisrs_copy = copy(apisrs) - srs_old = design(id = :1, data = apisrs) + srs = SimpleRandomSample(apisrs_copy,popsize=:fpc,ignorefpc = true) # `dim` - @test dim(srs_new)[1] == dim(srs_old)[1] - @test dim(srs_new)[2] == 42 - @test dim(srs_old)[2] == 45 + @test dim(srs)[2] == 42 # `colnames` - @test length(colnames(srs_new)) == dim(srs_new)[2] - @test length(colnames(srs_old)) == dim(srs_old)[2] + @test length(colnames(srs)) == dim(srs)[2] # `dimnames` - @test length(dimnames(srs_new)[1]) == parse(Int, last(dimnames(srs_new)[1])) - @test dimnames(srs_new)[2] == colnames(srs_new) - @test length(dimnames(srs_old)[1]) == parse(Int, last(dimnames(srs_old)[1])) - @test dimnames(srs_old)[2] == colnames(srs_old) + @test length(dimnames(srs)[1]) == parse(Int, last(dimnames(srs)[1])) + @test dimnames(srs)[2] == colnames(srs) # Stratified sampling tests end diff --git a/test/quantile.jl b/test/quantile.jl index 51c79007..3dcedfbb 100644 --- a/test/quantile.jl +++ b/test/quantile.jl @@ -1,17 +1,11 @@ @testset "quantile.jl" begin # SimpleRandomSample - apisrs = load_data("apisrs") + apisrs_original = load_data("apisrs") - srs_new = SimpleRandomSample(apisrs,popsize=:fpc,ignorefpc = true) - srs_old = design(id = :1, data = apisrs) + apisrs = copy(apisrs_original) + srs_new = SimpleRandomSample(apisrs; popsize=:fpc, ignorefpc=true) # 0.5th percentile - q_05_new = quantile(:api00, srs_new, 0.5) - q_05_old = quantile(:api00, srs_old, 0.5) - @test q_05_new == q_05_old # 0.25th percentile - q_025_new = quantile(:api00, srs_new, 0.25) - q_025_old = quantile(:api00, srs_old, 0.25) - @test q_025_new == q_025_old # StratifiedSample end From 6632f7d065e5354873670656b66adc05e454a241 Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Tue, 6 Dec 2022 10:08:45 +0200 Subject: [PATCH 2/3] Remove old design methods --- src/boxplot.jl | 8 -------- src/dimnames.jl | 3 --- src/hist.jl | 18 ------------------ src/plot.jl | 4 ---- src/quantile.jl | 9 --------- src/show.jl | 16 ---------------- 6 files changed, 58 deletions(-) diff --git a/src/boxplot.jl b/src/boxplot.jl index aa44b9f5..46f6958a 100644 --- a/src/boxplot.jl +++ b/src/boxplot.jl @@ -23,11 +23,3 @@ function boxplot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...) data * visual(BoxPlot) * map |> draw end - -function boxplot(design::design, x::Symbol, y::Symbol; kwargs...) - # TODO: change function, make it a wrapper - map = mapping(x, y; kwargs...) - data = AlgebraOfGraphics.data(design.variables) - - data * visual(BoxPlot) * map |> draw -end diff --git a/src/dimnames.jl b/src/dimnames.jl index 71c9ad2b..91bd473e 100644 --- a/src/dimnames.jl +++ b/src/dimnames.jl @@ -13,7 +13,6 @@ julia> dim(srs) ``` """ dim(design::AbstractSurveyDesign) = size(design.data) -dim(design::design) = size(design.variables) """ colnames(design) @@ -50,7 +49,6 @@ julia> colnames(srs) ``` """ colnames(design::AbstractSurveyDesign) = names(design.data) -colnames(design::design) = names(design.variables) """ dimnames(design) @@ -69,4 +67,3 @@ julia> dimnames(srs) ``` """ dimnames(design::AbstractSurveyDesign) = [string.(1:size(design.data, 1)), names(design.data)] -dimnames(design::design) = [string.(1:size(design.variables, 1)), names(design.variables)] diff --git a/src/hist.jl b/src/hist.jl index 93c7d2a8..c140e59e 100644 --- a/src/hist.jl +++ b/src/hist.jl @@ -1,7 +1,6 @@ sturges(n::Integer) = ceil(Int, log2(n)) + 1 sturges(vec::AbstractVector) = ceil(Int, log2(length(vec))) + 1 sturges(df::DataFrame, var::Symbol) = ceil(Int, log2(size(df[!, var], 1))) + 1 -sturges(design::design, var::Symbol) = sturges(design.variables, var) """ sturges(design::SurveyDesign, var::Symbol) @@ -22,7 +21,6 @@ sturges(design::AbstractSurveyDesign, var::Symbol) = sturges(design.data, var) freedman_diaconis(v::AbstractVector) = round(Int, length(v)^(1 / 3) * (maximum(v) - minimum(v)) / (2 * iqr(v))) freedman_diaconis(df::DataFrame, var::Symbol) = freedman_diaconis(df[!, var]) -freedman_diaconis(design::design, var::Symbol) = freedman_diaconis(design.variables[!, var]) """ freedman_diaconis(design::SurveyDesign, var::Symbol) @@ -85,19 +83,3 @@ function hist(design::AbstractSurveyDesign, var::Symbol, ) hist(design, var, bins(design, var); kwargs...) end - -function hist(design::design, var::Symbol, - bins::Union{Integer, AbstractVector} = freedman_diaconis(design, var); - normalization = :density, - kwargs... - ) - hist = histogram(bins = bins, normalization = normalization, kwargs...) - data(design.variables) * mapping(var, weights = :weights) * hist |> draw -end - -function hist(design::design, var::Symbol, - bins::Function; - kwargs... - ) - hist(design, var, bins(design, var); kwargs...) -end diff --git a/src/plot.jl b/src/plot.jl index 74875c6e..cb9792d1 100644 --- a/src/plot.jl +++ b/src/plot.jl @@ -18,7 +18,3 @@ save("scatter.png", s); nothing # hide function plot(design::AbstractSurveyDesign, x::Symbol, y::Symbol; kwargs...) data(design.data) * mapping(x, y, markersize = :weights) * visual(Scatter, marker = '○') |> draw end - -function plot(design::design, x::Symbol, y::Symbol; kwargs...) - data(design.variables) * mapping(x, y, markersize = :weights) * visual(Scatter, marker = '○') |> draw -end diff --git a/src/quantile.jl b/src/quantile.jl index 354e95b4..f2134e9d 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -30,15 +30,6 @@ function quantile(var, design::StratifiedSample, q) return df end -function quantile(var, design::design, q) - x = design.variables[!, var] - w = design.variables.probs - df = DataFrame(tmp = Statistics.quantile(Float32.(x), weights(w), q)) - rename!(df, :tmp => Symbol(string(q) .* "th percentile")) - - return df -end - # Inner method for `by` function quantile(x, w, _, q) df = DataFrame(tmp = Statistics.quantile(Float32.(x), weights(w), q)) diff --git a/src/show.jl b/src/show.jl index 9b5c234a..119dd14f 100644 --- a/src/show.jl +++ b/src/show.jl @@ -64,19 +64,3 @@ function Base.show(io::IO, ::MIME"text/plain", design::SurveyDesign) printinfo(io, "sampfraction", makeshort(design.sampfraction)) printinfo(io, "ignorefpc", string(design.ignorefpc); newline=false) end - -"Print information about a survey design initialized using `design`." -function Base.show(io::IO, ::MIME"text/plain", design::design) - printstyled(io, "Survey Design:\n"; bold=true) - printstyled(io, "variables: "; bold=true) - println(io, size(design.variables, 1), "x", size(design.variables, 2), " DataFrame") - printinfo(io, "id", makeshort(design.id)) - printinfo(io, "strata", makeshort(design.variables.strata)) - printinfo(io, "probs", makeshort(design.variables.probs)) - printinfo(io, "fpc:\n popsize", makeshort(design.variables.popsize)) - printinfo(io, " sampsize", makeshort(design.variables.sampsize); newline=false) - printstyled("\nnest: "; bold=true) - print(design.nest) - printstyled("\ncheck_strat: "; bold=true) - print(design.check_strat) -end From 0c322054698d1fee650e0c1e2ac6e9892202737b Mon Sep 17 00:00:00 2001 From: Iulia Dumitru Date: Tue, 6 Dec 2022 11:06:30 +0200 Subject: [PATCH 3/3] Remove old design --- src/Survey.jl | 2 -- src/design.jl | 79 --------------------------------------------------- 2 files changed, 81 deletions(-) delete mode 100644 src/design.jl diff --git a/src/Survey.jl b/src/Survey.jl index 5150ca45..331eaa31 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -11,7 +11,6 @@ using AlgebraOfGraphics using CategoricalArrays include("SurveyDesign.jl") -include("design.jl") include("mean.jl") include("quantile.jl") include("total.jl") @@ -25,7 +24,6 @@ include("show.jl") export load_data export AbstractSurveyDesign, SimpleRandomSample, StratifiedSample export SurveyDesign -export design export by export ht_calc export dim, colnames, dimnames diff --git a/src/design.jl b/src/design.jl deleted file mode 100644 index 6a50e913..00000000 --- a/src/design.jl +++ /dev/null @@ -1,79 +0,0 @@ -""" - design - -Type incorporating all necessary information to describe a survey design. -``` -""" -struct design - id - variables::DataFrame - nest::Bool - check_strat::Bool -end - -function get_weights(data, wt::Vector) - if nrow(data) == length(wt) - return wt - else - @error "length of the weights vector is not equal to the number of rows in the dataset" - end -end - -function get_weights(data, wt::Nothing) - return ones(nrow(data)) -end - -function get_weights(data, wt::Symbol) - wt = data[!, String(wt)] -end - -function get_probs(data, wt, probs::Symbol) - return data[!, String(probs)] -end - -function get_probs(data, wt, probs::Nothing) - return 1 ./ wt -end - -function get_fpc(data, fpc::Symbol) - return data[!, String(fpc)] -end - -function get_fpc(data, fpc::Nothing) - return repeat([nothing], nrow(data)) -end - -function get_fpc(data, fpc::Vector) - return fpc -end - -function get_fpc(data, fpc::Real) - return repeat([fpc], nrow(data)) -end - -function get_strata(data, strata::Symbol) - return data[!, String(strata)] -end - -function get_strata(data, strata::Vector) - return strata -end - -function get_strata(data, strata::Nothing) - return repeat([1], nrow(data)) -end - -function design(; data = DataFrame(), id = Symbol(), probs = nothing, strata = nothing, fpc = nothing, nest = false, check_strat = !nest, weights = nothing) - wt = get_weights(data, weights) - if isnothing(probs) & isnothing(weights) - # THIS WARNING IS NOT NECESSARY - @warn "No weights or probabilities supplied, assuming equal probability" - end - df = data - df.probs = ProbabilityWeights(get_probs(data, wt, probs)) - df.weights = FrequencyWeights(get_weights(data, weights)) - df.popsize = get_fpc(data, fpc) - df.sampsize = repeat([nrow(data)], nrow(data)) - df.strata = get_strata(data, strata) - return design(id, df, nest, check_strat) -end