-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Poor pooled estimates in simple case #15
Labels
Comments
I also meet with a case where In my case, densities will not integrate to 1 when I fit a univariate density. # bimodal data
library("simcausal")
D <- DAG.empty()
D <-
D + node("W1", distr = "rbern", prob = 0.5) +
node("sA.mu", distr = "rconst", const = 4*W1 - 2) +
node("sA", distr = "rnorm", mean = sA.mu, sd = 1)
D <- set.DAG(D, n.test = 10)
datO <- sim(D, n = 1e4, rndseed = 12345)
library("condensier")
library("sl3")
library(origami)
dat_dummy <- data.frame(dummy = 1, x = datO$sA)
#
# TEST 1: good fit
# pool = FALSE
# glm
# ---------------------------------------------------------------------------------------
task <- sl3_Task$new(dat_dummy, covariates=c("dummy"),outcome="x", folds = origami::make_folds(n = length(dat_dummy$x), V = 3))
lrn1 <- Lrnr_condensier$new(nbins = 20, bin_method = "equal.mass", pool = FALSE,
bin_estimator = speedglmR6$new())
lrn2 <- Lrnr_condensier$new(nbins = 100, bin_method = "equal.mass", pool = FALSE,
bin_estimator = speedglmR6$new())
lrn3 <- Lrnr_condensier$new(nbins = 200, bin_method = "equal.mass", pool = FALSE,
bin_estimator = speedglmR6$new())
learner_stack <- Stack$new(lrn1, lrn2, lrn3)
cv_stack <- Lrnr_cv$new(learner_stack)
cv_fit <- cv_stack$train(task)
risks <- cv_fit$cv_risk(loss_squared_error)
lrn_list <- list(lrn1, lrn2, lrn3)
best_fit <- lrn_list[[which.min(risks)]]$train(task)
p_hat <- empiricalDensity$new(best_fit$predict()[[1]], dat_dummy$x)
# true p
foo2 <- function(x) {(.5*dnorm(x, mean = 2) + .5*dnorm(x, mean = -2))}
p_hat$display(foo2)
#
# TEST 2: bad fit
# pool = TRUE
# xgboost
# the pdf is not normalized
# ---------------------------------------------------------------------------------------
task <- sl3_Task$new(dat_dummy, covariates=c("dummy"),outcome="x", folds = origami::make_folds(n = length(dat_dummy$x), V = 3))
lrn1 <- Lrnr_condensier$new(nbins = 20, bin_method = "equal.mass", pool = TRUE,
bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))
lrn2 <- Lrnr_condensier$new(nbins = 100, bin_method = "equal.mass", pool = TRUE,
bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))
lrn3 <- Lrnr_condensier$new(nbins = 200, bin_method = "equal.mass", pool = TRUE,
bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))
learner_stack <- Stack$new(lrn1, lrn2, lrn3)
cv_stack <- Lrnr_cv$new(learner_stack)
cv_fit <- cv_stack$train(task)
risks <- cv_fit$cv_risk(loss_squared_error)
lrn_list <- list(lrn1, lrn2, lrn3)
best_fit <- lrn_list[[which.min(risks)]]$train(task)
p_hat <- empiricalDensity$new(best_fit$predict()[[1]], dat_dummy$x)
# true p
foo2 <- function(x) {(.5*dnorm(x, mean = 2) + .5*dnorm(x, mean = -2))}
p_hat$display(foo2)
# p_hat$normalize()
# p_hat$display(foo2)
#
# TEST 3: good fit
# pool = FALSE
# xgboost
# ---------------------------------------------------------------------------------------
task <- sl3_Task$new(dat_dummy, covariates=c("dummy"),outcome="x", folds = origami::make_folds(n = length(dat_dummy$x), V = 3))
lrn1 <- Lrnr_condensier$new(nbins = 20, bin_method = "equal.mass", pool = FALSE,
bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))
lrn2 <- Lrnr_condensier$new(nbins = 100, bin_method = "equal.mass", pool = FALSE,
bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))
lrn3 <- Lrnr_condensier$new(nbins = 200, bin_method = "equal.mass", pool = FALSE,
bin_estimator = Lrnr_xgboost$new(nrounds = 50, objective = "reg:logistic"))
learner_stack <- Stack$new(lrn1, lrn2, lrn3)
cv_stack <- Lrnr_cv$new(learner_stack)
cv_fit <- cv_stack$train(task)
risks <- cv_fit$cv_risk(loss_squared_error)
lrn_list <- list(lrn1, lrn2, lrn3)
best_fit <- lrn_list[[which.min(risks)]]$train(task)
p_hat <- empiricalDensity$new(best_fit$predict()[[1]], dat_dummy$x)
# true p
foo2 <- function(x) {(.5*dnorm(x, mean = 2) + .5*dnorm(x, mean = -2))}
p_hat$display(foo2)
#
# TEST 4: bad fit
# pool = TRUE
# glm
# due to glm not be able to smooth
# ---------------------------------------------------------------------------------------
task <- sl3_Task$new(dat_dummy, covariates=c("dummy"),outcome="x", folds = origami::make_folds(n = length(dat_dummy$x), V = 3))
lrn1 <- Lrnr_condensier$new(nbins = 20, bin_method = "equal.mass", pool = TRUE,
bin_estimator = speedglmR6$new())
lrn2 <- Lrnr_condensier$new(nbins = 100, bin_method = "equal.mass", pool = TRUE,
bin_estimator = speedglmR6$new())
lrn3 <- Lrnr_condensier$new(nbins = 200, bin_method = "equal.mass", pool = TRUE,
bin_estimator = speedglmR6$new())
learner_stack <- Stack$new(lrn1, lrn2, lrn3)
cv_stack <- Lrnr_cv$new(learner_stack)
cv_fit <- cv_stack$train(task)
risks <- cv_fit$cv_risk(loss_squared_error)
lrn_list <- list(lrn1, lrn2, lrn3)
best_fit <- lrn_list[[which.min(risks)]]$train(task)
p_hat <- empiricalDensity$new(best_fit$predict()[[1]], dat_dummy$x)
# true p
foo2 <- function(x) {(.5*dnorm(x, mean = 2) + .5*dnorm(x, mean = -2))}
p_hat$display(foo2)
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The following code attempts to fit a marginal density using both pooled and unpooled
condensier
estimates by way ofsl3
. The true density is standard normal. The unpooled estimates (red) approximate the true density(blue), but the unpooled estimates (green) do not.The text was updated successfully, but these errors were encountered: