diff --git a/DESCRIPTION b/DESCRIPTION index 14d03fd2..9463fe98 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BAS -Version: 1.5.3 -Date: 2018-10-29 +Version: 1.5.4 +Date: 2018-11-05 Title: Bayesian Variable Selection and Model Averaging using Bayesian Adaptive Sampling Authors@R: c(person("Merlise", "Clyde", email="clyde@duke.edu", role=c("aut","cre", "cph"), diff --git a/NEWS.md b/NEWS.md index dd61ba94..bcce3973 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,17 @@ +# BAS 1.5.4 + +## Bug Fixes + + * rounding issues with clang on fedora and solaris with + `force.heredity = TRUE` lead to sampling continuing under + `method='BAS'` and duplicate models so that normalized posterior + probabilities were incorrect. + [issue #38](https://github.com/merliseclyde/BAS/issues/38) + + * FORTRAN errors when data has zero rows + (issue #37)[https://github.com/merliseclyde/BAS/issues/37] + add check and test for n == 0 due to subsetting input data + # BAS 1.5.3 ## Bug Fixes diff --git a/R/bas_glm.R b/R/bas_glm.R index 8d31499b..d588712e 100644 --- a/R/bas_glm.R +++ b/R/bas_glm.R @@ -287,6 +287,8 @@ bas.glm <- function(formula, family = binomial(link = "logit"), p <- dim(X)[2] nobs <- dim(X)[1] + if (nobs == 0) {stop("Sample size is zero; check data andsubset")} + # weights = as.vector(model.weights(mf)) weights <- as.vector(model.weights(mf)) @@ -400,7 +402,7 @@ bas.glm <- function(formula, family = binomial(link = "logit"), #print(MCMC.iterations) - if (is.null(update)) { + if (is.null(update) & !force.heredity) { if (n.models == 2^(p - 1)) { update <- n.models + 1 } else { diff --git a/R/bas_lm.R b/R/bas_lm.R index a7a9a2fb..fd8688d3 100644 --- a/R/bas_lm.R +++ b/R/bas_lm.R @@ -242,8 +242,8 @@ normalize.n.models <- function(n.models, p, initprobs, method, bigmem) { #' See details in Clyde and Ghosh (2012). #' @param force.heredity Logical variable to force all levels of a factor to be #' included together and to include higher order interactions only if lower -#' order terms are included. Currently only supported with `method='MCMC'`. -#' Default is TRUE. +#' order terms are included. Currently only supported with `method='MCMC'` and +#' `method = 'BAS'`; for these methods the default is TRUE. #' @param pivot Logical variable to allow pivoting of columns when obtaining the #' OLS estimates of a model so that models that are not full rank can be fit. Defaults to TRUE. #' Currently coefficients that are not estimable are set to zero. Use caution with @@ -526,6 +526,8 @@ bas.lm <- function(formula, namesx <- dimnames(X)[[2]] namesx[1] <- "Intercept" n <- dim(X)[1] + + if (n == 0) {stop("Sample size is zero; check data andsubset")} weights <- as.vector(model.weights(mf)) if (is.null(weights)) { @@ -649,6 +651,8 @@ bas.lm <- function(formula, if (method == "MCMC+BAS" | method == "deterministic") { force.heredity <- FALSE +# warning("force.heredity not supported with these sampling methods, setting to FALSE. +# Use force.heredity.bas() to post-process and inforce constraints") } # does not work with updating the tree if (force.heredity) { parents <- make.parents.of.interactions(mf, data) @@ -686,8 +690,7 @@ bas.lm <- function(formula, # print(n.models) modelprior <- normalize.modelprior(modelprior, p) - - if (is.null(update)) { + if (is.null(update) & !force.heredity) { if (n.models == 2^(p - 1)) { update <- n.models + 1 } else { @@ -695,6 +698,7 @@ bas.lm <- function(formula, } } + modelindex <- as.list(1:n.models) Yvec <- as.numeric(Y) modeldim <- as.integer(rep(0, n.models)) diff --git a/man/bas.lm.Rd b/man/bas.lm.Rd index 8f5c33fd..c2cb3c45 100644 --- a/man/bas.lm.Rd +++ b/man/bas.lm.Rd @@ -177,7 +177,7 @@ See details in Clyde and Ghosh (2012).} \item{force.heredity}{Logical variable to force all levels of a factor to be included together and to include higher order interactions only if lower order terms are included. Currently only supported with `method='MCMC'`. -Default is TRUE.} +Default is TRUE for MCMC.} \item{pivot}{Logical variable to allow pivoting of columns when obtaining the OLS estimates of a model so that models that are not full rank can be fit. Defaults to TRUE. diff --git a/src/glm_sampleworep.c b/src/glm_sampleworep.c index 9406cbbb..cc0d3b35 100644 --- a/src/glm_sampleworep.c +++ b/src/glm_sampleworep.c @@ -99,7 +99,9 @@ SEXP glm_sampleworep(SEXP Y, SEXP X, SEXP Roffset, SEXP Rweights, int *modelwork= ivecalloc(p); // sample models - for (m = 1; m < k && pigamma[0] < 1.0; m++) { + // add DOUBLE_EPS to fix issue in + // https://github.com/merliseclyde/BAS/issues/38 + for (m = 1; m < k && (pigamma[0] + DOUBLE_EPS) < 1.0; m++) { INTEGER(modeldim)[m] = 0.0; for (i = n; i < p; i++) { INTEGER(modeldim)[m] += model[vars[i].index]; diff --git a/src/lm_sampleworep.c b/src/lm_sampleworep.c index da236994..bb145e61 100644 --- a/src/lm_sampleworep.c +++ b/src/lm_sampleworep.c @@ -316,8 +316,11 @@ extern SEXP sampleworep_new(SEXP Y, SEXP X, SEXP Rweights, SEXP Rprobinit, } */ // Sample models - for (m = 1; m < k && pigamma[0] < 1.0; m++) { - // Rprintf("model %d, starting pigamma = %lf\n", m, pigamma[0]); + // add DOUBLE_EPS to pigamma[0] due to rounding issues with clang on + // fedora and solaris with clang with force.heredity + // https://github.com/merliseclyde/BAS/issues/38 + for (m = 1; m < k && (pigamma[0] + DOUBLE_EPS) < 1.0; m++) { +// Rprintf("model %d, starting pigamma = %lf\n", m, pigamma[0]); INTEGER(modeldim)[m] = 0; for (i = n; i < p; i++) { INTEGER(modeldim)[m] += model[vars[i].index]; diff --git a/tests/testthat/test-bas-glm.R b/tests/testthat/test-bas-glm.R index 32428d29..03873808 100644 --- a/tests/testthat/test-bas-glm.R +++ b/tests/testthat/test-bas-glm.R @@ -398,24 +398,41 @@ test_that("MCMC+BAS: missing MCMC.iterations and n.models arg", { # FIXME add issue? check that it works with other prior # with prior probabilities; i.e. failed with Jeffreys +# Checks for https://github.com/merliseclyde/BAS/issues/37 + +test_that("no data", { + data(Pima.tr, package="MASS") + expect_error(pima_BAS <- bas.glm(type ~ (bp + glu + npreg)^2, + data = Pima.tr[0,], method = "BAS", + betaprior = bic.prior(), + family = binomial(), update=NULL, + modelprior =uniform(), + force.heredity=TRUE) + ) + +}) + +# Checks for https://github.com/merliseclyde/BAS/issues/38 + test_that("herdity and BAS", { + set.seed(2) data(Pima.tr, package="MASS") pima_BAS <- bas.glm(type ~ (bp + glu + npreg)^2, data = Pima.tr, method = "BAS", betaprior = bic.prior(), - family = binomial(), + family = binomial(), update=NULL, modelprior =uniform(), - update = 5, force.heredity=TRUE) - pima_BAS_no <- bas.glm(type ~ (bp + glu + npreg)^2, + pima_BAS_no <- bas.glm(type ~ (bp + glu + npreg)^2, data = Pima.tr, method = "BAS", betaprior = bic.prior(), - family = binomial(), + family = binomial(), update=NULL, modelprior =uniform(), - update = 5, force.heredity=FALSE) pima_BAS_no <- force.heredity.bas(pima_BAS_no) expect_equal(0L, sum(pima_BAS$probne0 > 1.0)) expect_equal(0L, sum(pima_BAS_no$probne0[-1] > 1.0)) expect_equal(pima_BAS$probne0, pima_BAS_no$probne0) + expect_equal(pima_BAS$n.models, pima_BAS_no$n.models) + expect_equal(0L, sum(duplicated(pima_BAS$which))) }) diff --git a/tests/testthat/test-bas-lm.R b/tests/testthat/test-bas-lm.R index b3e8b0f2..768e70f6 100644 --- a/tests/testthat/test-bas-lm.R +++ b/tests/testthat/test-bas-lm.R @@ -221,7 +221,7 @@ test_that("force.heredity", { expect_equal(basObj$probne0, basObj.old$probne0) }) -test_that("interactions", { +test_that("interactions & heredity", { data(Hald) bas_hald <- bas.lm(Y ~ .^2, data=Hald, method="MCMC", force.heredity = TRUE) @@ -235,7 +235,45 @@ test_that("interactions", { bas_hald$n.models) }) +test_that("sample size zero", { + data(Hald) + expect_error(bas_hald <- bas.lm(Y ~ .^2, data=Hald[0,], method="MCMC", + force.heredity = TRUE)) + +}) +# add DOUBLE_EPS to fix issue in +# https://github.com/merliseclyde/BAS/issues/38 +test_that("herdity and bas.lm", { + set.seed(2) + data(Pima.tr, package="MASS") + pima_BAS <- bas.lm(as.numeric(type) ~ (bp + glu + npreg)^2, + data = Pima.tr, method = "BAS", + prior = "BIC", + update = NULL, + modelprior = uniform(), + force.heredity = TRUE) + pima_BAS_no <- bas.lm(as.numeric(type) ~ (bp + glu + npreg)^2, + data = Pima.tr, method = "deterministic", + prior = "BIC", + update = NULL, + modelprior = uniform(), + force.heredity = FALSE) + pima_BAS_no <- force.heredity.bas(pima_BAS_no) + expect_equal(0L, sum(pima_BAS$probne0 > 1.0)) + expect_equal(0L, sum(pima_BAS_no$probne0[-1] > 1.0)) + expect_equal(pima_BAS$probne0, pima_BAS_no$probne0) + expect_equal(pima_BAS$n.models, pima_BAS_no$n.models) + expect_equal(0L, sum(duplicated(pima_BAS$which))) + pima_BAS_mcmc = bas.lm(as.numeric(type) ~ (bp + glu + npreg)^2, + data = Pima.tr, method = "MCMC", + prior = "BIC", + update = NULL, + MCMC.iterations = 10000, + modelprior = uniform(), + force.heredity = TRUE) + expect_equal(0L, sum(duplicated(pima_BAS_mcmc$which))) +}) # add more testing for update.bas #