diff --git a/DESCRIPTION b/DESCRIPTION index abe0c9a..95569d5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: imcRtools -Version: 1.7.6 +Version: 1.7.7 Title: Methods for imaging mass cytometry data analysis Description: This R package supports the handling and analysis of imaging mass cytometry diff --git a/NEWS b/NEWS index 8b71d86..598c70e 100644 --- a/NEWS +++ b/NEWS @@ -205,3 +205,7 @@ Changes in version 1.7.6 (2023-09-26) + Updated example data in "inst" to newest version +Changes in version 1.7.7 (2023-10-02) + ++ Bug fix: in rare cases the testInteractions p-values were not correctly computed due to machine precision issues + diff --git a/R/testInteractions.R b/R/testInteractions.R index 4601a63..d0647b7 100644 --- a/R/testInteractions.R +++ b/R/testInteractions.R @@ -26,6 +26,9 @@ #' enriched or depleted per group. #' @param return_samples single logical indicating if the permuted interaction #' counts of all iterations should be returned. +#' @param tolerance single numeric larger than 0. This parameter defines the +#' difference between the permuted count and the actual counts at which both +#' are regarded as equal. Default taken from \code{all.equal}. #' @param BPPARAM parameters for parallelized processing. #' #' @section Counting and summarizing cell-cell interactions: @@ -161,13 +164,15 @@ testInteractions <- function(object, iter = 1000, p_threshold = 0.01, return_samples = FALSE, + tolerance = sqrt(.Machine$double.eps), BPPARAM = SerialParam()){ # Input check method <- match.arg(method) .valid.countInteractions.input(object, group_by, label, method, patch_size, colPairName) - .valid.testInteractions.input(iter, p_threshold, return_samples) + .valid.testInteractions.input(iter, p_threshold, return_samples, + tolerance) # Re-level group_by label if(is.factor(colData(object)[[group_by]])) { @@ -197,7 +202,8 @@ testInteractions <- function(object, cur_out <- .calc_p_vals(cur_count, cur_out, n_perm = iter, p_thres = p_threshold, - return_samples = return_samples) + return_samples = return_samples, + tolerance = tolerance) setorder(cur_out, "group_by", "from_label", "to_label") diff --git a/R/utils.R b/R/utils.R index b21a85d..a01aca8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -746,9 +746,9 @@ "to_label")] if (check_missing) { - dat_temp <- dat_temp[CJ(group_by = unique(dat_temp$group_by), - from_label = as.factor(levels(dat_temp$from_label)), - to_label = as.factor(levels(dat_temp$to_label))), + dat_temp <- dat_temp[CJ(group_by = unique(dat_table$group_by), + from_label = as.factor(levels(dat_table$from_label)), + to_label = as.factor(levels(dat_table$to_label))), on = c("group_by", "from_label", "to_label")] ct <- from_label <- to_label <- NULL dat_temp[is.na(dat_temp$ct), ct := 0] @@ -918,7 +918,8 @@ return(cur_out) } -.calc_p_vals<- function(dat_baseline, dat_perm, n_perm, p_thres, return_samples){ +.calc_p_vals<- function(dat_baseline, dat_perm, n_perm, p_thres, return_samples, + tolerance){ dat_perm <- merge(dat_perm, dat_baseline[, c("from_label", "to_label", "group_by", "ct")], @@ -931,10 +932,12 @@ dat_perm[, ':='(ct_perm = replace(ct_perm, is.na(ct_perm), 0), ct_obs = replace(ct_obs, is.na(ct_obs), 0))] + # We introduced a more lenient way of checking equality to avoid issues + # with machine precision dat_stat <- dat_perm[ , .(ct = mean(ct_obs), p_gt = ifelse(max(ct_obs) == 0, 1, - (sum(ct_perm >= ct_obs) + 1) / (n_perm + 1)), - p_lt = (n_perm - sum(ct_perm > ct_obs) + 1) / (n_perm + 1)), + (sum((ct_perm - ct_obs) > -tolerance) + 1) / (n_perm + 1)), + p_lt = (n_perm - sum((ct_perm - ct_obs) > tolerance) + 1) / (n_perm + 1)), by=c("group_by", "from_label", "to_label")] dat_stat[, interaction := p_gt < p_lt] diff --git a/R/validityChecks.R b/R/validityChecks.R index c679d2b..7222fb6 100644 --- a/R/validityChecks.R +++ b/R/validityChecks.R @@ -959,7 +959,8 @@ } } -.valid.testInteractions.input <- function(iter, p_threshold, return_samples){ +.valid.testInteractions.input <- function(iter, p_threshold, return_samples, + tolerance){ if (length(iter) != 1 | !is.numeric(iter)) { stop("'iter' must be a single positive numeric.") } @@ -979,6 +980,14 @@ if (length(return_samples) != 1 | !is.logical(return_samples)) { stop("'return_samples' must be a single logical.") } + + if (length(tolerance) != 1 | !is.numeric(tolerance)) { + stop("'tolerance' must be a single numeric.") + } + + if (tolerance < 0) { + stop("'tolerance' must be larger than 0.") + } } .valid.findBorderCells.input <- function(object, img_id, border_dist, coords){ diff --git a/man/testInteractions.Rd b/man/testInteractions.Rd index 28eea53..91a710e 100644 --- a/man/testInteractions.Rd +++ b/man/testInteractions.Rd @@ -14,6 +14,7 @@ testInteractions( iter = 1000, p_threshold = 0.01, return_samples = FALSE, + tolerance = sqrt(.Machine$double.eps), BPPARAM = SerialParam() ) } @@ -48,6 +49,10 @@ enriched or depleted per group.} \item{return_samples}{single logical indicating if the permuted interaction counts of all iterations should be returned.} +\item{tolerance}{single numeric larger than 0. This parameter defines the +difference between the permuted count and the actual counts at which both +are regarded as equal. Default taken from \code{all.equal}.} + \item{BPPARAM}{parameters for parallelized processing.} } \value{