Skip to content

Commit

Permalink
Shikhar manual testing code
Browse files Browse the repository at this point in the history
  • Loading branch information
smishr committed Sep 30, 2022
1 parent 13c829f commit e93ae7a
Show file tree
Hide file tree
Showing 2 changed files with 269 additions and 0 deletions.
168 changes: 168 additions & 0 deletions clean_examples.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# ### Lumley Texbook code, Fig 2.2 pg 20
using Revise
using Survey
using DataFrames
using CSV

# Load in dataframe
apisrs = CSV.read("assets/apisrs.csv",DataFrame)

### Set design (All should give identical results)
srs_design = SimpleRandomSample(apisrs, popsize = apisrs.fpc) # popsize only
srs_design = SimpleRandomSample(apisrs, weights = apisrs.pw) # no popsize, so weights given as Vector
srs_design = SimpleRandomSample(apisrs, weights = :pw) # no popsize, so weights given as Symbol
srs_design = SimpleRandomSample(apisrs, probs = 1 ./ apisrs.pw) # no popsize, so probs given as Vector

svytotal(:enroll,srs_design)
svymean([:enroll,:api00],srs_design)
svymean(:enroll,srs_design)

# svytotal error
svytotal(:api00, srs)

# No fpc example
no_fpc = SimpleRandomSample(apisrs, ignorefpc = true)
svytotal(:enroll,no_fpc)
svytotal(:api00,no_fpc)
svymean(:enroll,no_fpc)

####
using Revise
using Survey
using DataFrames
using CSV
using CategoricalArrays
# Test feature for categorical variables
apisrs_categ = CSV.read("assets/apisrs.csv",DataFrame)
eltype(apisrs_categ.stype)
# Convert a column to CategoricalArray
apisrs_categ.stype = CategoricalArray(apisrs_categ.stype)
eltype(apisrs_categ.stype)

srs_design_categ = SimpleRandomSample(apisrs_categ, popsize = apisrs_categ.fpc)

# isa(srs_design_categ.data.stype, CategoricalArray)
# isa(srs_design_categ.data[!,:stype], CategoricalArray)

# Svymean and svytotal example
svymean(:enroll,srs_design_categ) # works
svymean(:stype,srs_design_categ) # no method matching /(::CategoricalValue{String1, UInt32}, ::Int64)
svytotal(:stype,srs_design_categ)

# way to update
srs_design.data.apidiff = srs_design.data.api00 - srs_design.data.api99


svyquantile(:enroll, srs_design_categ,0.5)

# isa(srs_design_categ.data.stype, CategoricalArray)


# # apisrs = DataFrame(CSV.file("data/apisrs.csv"))
# # Base.format_bytes(Base.summarysize(apisrs.stype))
# # Base.format_bytes(Base.summarysize(CategoricalArray(apisrs.stype)))


# ### Test 10.09.22

# gdf = groupby(design.data, by)
# combine(gdf, [formula, :weights] => ((a, b) -> func(a, design, b, params...)) => AsTable)

# using Revise
# using Survey
# using DataFrames
# using CSV
# using StatsBase

# apisrs_categ = CSV.read("assets/apisrs.csv",DataFrame) # laod data
# srs_design = SimpleRandomSample(apisrs_categ, popsize = apisrs_categ.fpc) # create design object
# # manually grouby to get result
# gdf = groupby(srs_design.data, :cname )
# combine(gdf, :api00 => mean) # works
# combine(gdf, (:api00,srs_design) => svymean)

# combine(gdf, [:api00, :pw] => ((a, b) -> svymean(a, srs_design, b)) => AsTable)

# Test 12.09.22
using Revise
using Survey
using DataFrames
using CSV
using StatsBase
apisrs_categ = CSV.read("assets/apisrs.csv",DataFrame) # laod data
srs_design = SimpleRandomSample(apisrs_categ, popsize = apisrs_categ.fpc) # create design object
gdf = groupby(srs_design.data, :cname )
combine(gdf, [:api00, :pw] => ((a, b) -> svymean(a, srs_design, b)) => AsTable)




# # print("Yolo")
# test = combine(gdf, x => mean => :mean) # |> DataFrame |> AsTable # , (x , design) => sem => :sem ) |> DataFrame
# @show test
# # show(test)
# # delay(50000)
# return 0

## 21.09.22 Stratified test 1
# Ideally you should stratify on a CategoricalArray, alternatively, convert the StringX to categorical value before running stratifiedSample
using Revise
using Survey
using DataFrames
using CSV
using StatsBase
using CategoricalArrays

apistrat_categ = CSV.read("assets/apistrat.csv",DataFrame) # load data
apistrat_categ.stype = CategoricalArray(apistrat_categ.stype)
eltype(apistrat_categ.stype)

strat_categ_design = StratifiedSample(apistrat_categ, :stype ; popsize = apistrat_categ.fpc )
svymean(:stype,strat_categ_design)
svytotal(:stype,strat_categ_design)

### Strat normal
using Revise
using Survey
using DataFrames
using CSV
using StatsBase

apistrat = CSV.read("assets/apistrat.csv",DataFrame) # laod data
strat_design = StratifiedSample(apistrat, :stype ; popsize = apistrat.fpc )
svytotal(:api00,strat_design)
svymean(:api00,strat_design)

svytotal(:enroll,strat_design)
svymean(:enroll,strat_design)

# Support for categorical var

# Test feature for categorical variables


srs_design_categ = SimpleRandomSample(apisrs_categ, popsize = apisrs_categ.fpc)

# V̂ȳₕ = Nₕ .^2 ./ nₕ .* (1 .- fₕ) .* s²ₕ
# V̂Ȳ̂ = 1 ./ sum(Nₕ) .* sum( Nₕ .^2 .* V̂ȳₕ) #(Nₕ .^ 2) .* design.fpc .* s²h ./ design.sampsize # sum(combine(gdf, [x,:weights] => ( (a,b) -> wsum(a,b) ) => :total).total)


StratifiedSample(apistrat, :stype ; weights = :pw )


## 26.09.22 HT test
using Revise
using Survey
using DataFrames
using CSV

# Load in dataframe
apisrs = CSV.read("assets/apisrs.csv",DataFrame)

### Set design (All should give identical results)
srs_design = SimpleRandomSample(apisrs, popsize = apisrs.fpc) # popsize only

ht_calc(:api00, srs_design)


ht_calc(:api00, strat_design)
101 changes: 101 additions & 0 deletions shikharTests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
## Shikhar added test 24.08.22
using Revise;
using Survey;
apisrs = load_data("apisrs");
srs = SimpleRandomSample(apisrs, weights = apisrs.pw );
svymean(:enroll, srs)

# Test without fpc
using Revise;
using Survey;
apisrs_nofpc = load_data("apisrs");
srs = SimpleRandomSample(apisrs_nofpc,weights = apisrs.pw,ignorefpc = true);
svytotal(:enroll, srs)

using Revise;
using Survey;
using DataFrames;
apisrs = load_data("apisrs");
srs = SimpleRandomSample(apisrs, weights = apisrs.pw );
svytotal(:enroll, srs)

srs_design = SimpleRandomSample(apisrs, weights = apisrs.pw );
factor_variable_test = svytotal(:stype, srs)

##########
using Survey
srs_design = SimpleRandomSample(apisrs, weights = apisrs.pw )


macro svypipe(design::AbstractSurveyDesign, args...)
# Some definitions
end
@svypipe design |> groupby(:country) |> mean(:height)

using StatsBase
combine(groupby(x, :country) , :height => mean)

# Works
@pipe x |> groupby(_, :country) |> combine(_, :height => mean)
#doesnt work
@pipe x |> groupby(:country) |> combine(_, :height => mean)

using Lazy
import DataFrames.groupby
@> x groupby(:country) combine(:height => mean)




### Test svyby
svyby(:api00,:cname, srs, svymean )
groupby(apisrs,:cname)
combine(groupby(apisrs,:cname) , :api00 => mean)
combine(groupby(apisrs,:cname) , :api00 => svymean => AsTable)




x = DataFrame(country = [1,2,3,4,4], height = [10,20,30,40,20])

svyby(srs_desing, [enroll,] , summarise = mean, col = col1)

(srs_design, enroll)

# function |> (design::AbstractSurveyDesign ; func)
# design.data |> func(...)
# end



### 5.09.22 Cleaned up tests
using Revise;
using Survey;
apisrs = load_data("apisrs");
srs = SimpleRandomSample(apisrs, weights = apisrs.pw );
svymean(:enroll, srs)


# New issue:
# Add CategoricalArrays ("Factor") support, multiple dispatch
# Add multiple dispatch methods for `CategoricalArray` type columns in the dataset

# • Intelligent parsing of `StringX` columns to be read as CategoricalArrays.
# Eg/ if nunique(col) < len(col)/2

# # If sampling probabilities given then sampling weights is inverse of probs
# if !isnothing(probs)
# weights = 1 ./ probs
# end


# sampsize::Union{Nothing,Vector{<:Real}}
# popsize::Union{Nothing,Vector{<:Real}}
# sampfraction::Vector{<:Real}
# fpc::Vector{<:Real}
# combine(gdf) do sdf
# DataFrame(mean = mean(sdf[!, x], sem = sem(x, design::SimpleRandomSample)))
# end

# if isa(x,Symbol) &&
# return DataFrame(mean = ["Yolo"], sem = ["Yolo"])

1 comment on commit e93ae7a

@iuliadmtru
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These files should not be part of the package.

Please sign in to comment.