Merge pull request #135 from USEPA/62-bidirectional-summary-stats

62 bidirectional summary stats
USEPA · Aug 24, 2023 · 6e482f8 · 6e482f8
2 parents 8af642d + 9c30b65
commit 6e482f8
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 35 deletions.
diff --git a/R/mc4.R b/R/mc4.R
@@ -99,19 +99,15 @@ mc4 <- function(ae, wr = FALSE) {
   if (check_tcpl_db_schema()) {
     # if we're using v3 schema we want to tcplfit2
 
-    # check to see if we specified fit models or bidirectional for tcplfit2
+    # check to see if we specified fit models for tcplfit2
     fitmodels <- c("cnst", "hill", "gnls", "poly1", "poly2", "pow", "exp2", "exp3", "exp4", "exp5")
-    bidirectional <- TRUE
     if ("fitmodels" %in% names(dat)) {
       #extract the fitmodels from dat and pass to fitting
       fitmodels <- unique(dat$fitmodels)[[1]]
 
     }
-    if("bidirectional" %in% names(dat)){
-      bidirectional <- unique(dat$bidirectional)[[1]]
-    }
 
-    dat <- tcplFit2(dat, fitmodels = fitmodels,bidirectional = bidirectional )
+    dat <- tcplFit2(dat, fitmodels = fitmodels)
 
 
 

diff --git a/R/mc5.R b/R/mc5.R
@@ -105,8 +105,11 @@ mc5 <- function(ae, wr = FALSE) {
     cutoff <- max(dat$coff)
     #can remove this once loading of data is working correctly
     dat <- tcplQuery(paste0("SELECT  
-    `mc4`.`m4id`,    `mc4`.`aeid`,    `mc4`.`spid`,    `mc4`.`bmad`,    `mc4`.`resp_max`,    `mc4`.`resp_min`,    `mc4`.`max_mean`,    `mc4`.`max_mean_conc`,
-    `mc4`.`max_med`,    `mc4`.`max_med_conc`,    `mc4`.`logc_max`,    `mc4`.`logc_min`,    `mc4`.`nconc`,    `mc4`.`npts`,    `mc4`.`nrep`,    `mc4`.`nmed_gtbl`,
+    `mc4`.`m4id`,    `mc4`.`aeid`,    `mc4`.`spid`,    `mc4`.`bmad`,    `mc4`.`resp_max`,    `mc4`.`resp_min`,
+    `mc4`.`max_mean`,    `mc4`.`max_mean_conc`,    `mc4`.`min_mean`,    `mc4`.`min_mean_conc`,
+    `mc4`.`max_med`,    `mc4`.`max_med_conc`,    `mc4`.`min_med`,    `mc4`.`min_med_conc`,    
+    `mc4`.`max_med_diff`,    `mc4`.`max_med_diff_conc`,    `mc4`.`logc_max`,    `mc4`.`logc_min`,    `mc4`.`nconc`,
+    `mc4`.`npts`,    `mc4`.`nrep`,    `mc4`.`nmed_gtbl_pos`,    `mc4`.`nmed_gtbl_neg`,
     `mc4`.`tmpi`,	  `mc4_param`.`model`,    `mc4_param`.`model_param`,    `mc4_param`.`model_val`
     FROM mc4 inner join mc4_param on mc4.m4id = mc4_param.m4id where mc4.aeid = ",ae,";"))
     # if we're using v3 schema we want to tcplfit2

diff --git a/R/mc6_mthds.R b/R/mc6_mthds.R
@@ -64,8 +64,14 @@
 #'   viability assay with winning model is gain-loss (gnls); if hitc >= 0.9, modl = "gnls" and 
 #'   cell_viability_assay = 1, then flag.}
 #'   \item{no.med.gt.3bmad}{Flag series where no median response values are greater than baseline as 
-#'   defined by 3 times the baseline median absolute deviation (bmad); nmed_gtbl = 0, where 
-#'   nmed_gtbl is the number of medians greater than 3 * bmad.}
+#'   defined by 3 times the baseline median absolute deviation (bmad); nmed_gtbl_pos and
+#'   nmed_gtbl_neg both = 0, where nmed_gtbl_pos/_neg is the number of medians greater than 3 * 
+#'   bmad/less than -3 * bmad.}
+#'   \item{no.med.single.dir.gt.3bmad}{Flag series where no median response values in the intended
+#'   fit direction are greater than baseline as defined by 3 times the baseline median absolute
+#'   deviation (bmad); Depending on intended direction, either nmed_gtbl_pos or nmed_gtbl_neg are
+#'   = 0, where nmed_gtbl_pos/_neg is the number of medians greater than 3 * bmad/less than -3 
+#'   * bmad.}
 #' }
 #' 
 #' @note
@@ -149,8 +155,8 @@ mc6_mthds <- function() {
                 "flag")
       init <- bquote(list(.(mthd), .(flag), FALSE))
       e1 <- bquote(ft[ , .(c(out[4:5], "test")) := .(init)])
-      e2 <- bquote(ft[ , lstc := max_med_conc == logc_max])
-      e3 <- bquote(ft[ , test := nmed_gtbl == 1 & hitc >= 0.9 & lstc])
+      e2 <- bquote(ft[ , lstc := max_med_diff_conc == logc_max])
+      e3 <- bquote(ft[ , test := (nmed_gtbl_pos == 1 | nmed_gtbl_neg == 1) & hitc >= 0.9 & lstc])
       e4 <- bquote(f[[.(mthd)]] <- ft[which(test), .SD, .SDcols = .(out)])
       cr <- c("mc6_mthd_id", "flag", "test", "lstc")
       e5 <- bquote(ft[ , .(cr) := NULL])
@@ -165,8 +171,8 @@ mc6_mthds <- function() {
                 "flag")
       init <- bquote(list(.(mthd), .(flag), FALSE))
       e1 <- bquote(ft[ , .(c(out[4:5], "test")) := .(init)])
-      e2 <- bquote(ft[ , lstc := max_med_conc == logc_max])
-      e3 <- bquote(ft[ , test := nmed_gtbl == 1 & hitc >= 0.9 & !lstc])
+      e2 <- bquote(ft[ , lstc := max_med_diff_conc == logc_max])
+      e3 <- bquote(ft[ , test := (nmed_gtbl_pos == 1 | nmed_gtbl_neg == 1) & hitc >= 0.9 & !lstc])
       e4 <- bquote(f[[.(mthd)]] <- ft[which(test), .SD, .SDcols = .(out)])
       cr <- c("mc6_mthd_id", "flag", "test", "lstc")
       e5 <- bquote(ft[ , .(cr) := NULL])
@@ -181,7 +187,7 @@ mc6_mthds <- function() {
                 "flag")
       init <- bquote(list(.(mthd), .(flag), FALSE))
       e1 <- bquote(ft[ , .(c(out[4:5], "test")) := .(init)])
-      e2 <- bquote(ft[ , test := nmed_gtbl > 1 & hitc < 0.9])
+      e2 <- bquote(ft[ , test := (nmed_gtbl_pos > 1 | nmed_gtbl_neg > 1) & hitc < 0.9 & hitc >= 0])
       e3 <- bquote(f[[.(mthd)]] <- ft[which(test), .SD, .SDcols = .(out)])
       cr <- c("mc6_mthd_id", "flag", "test")
       e4 <- bquote(ft[ , .(cr) := NULL])
@@ -277,9 +283,9 @@ mc6_mthds <- function() {
       init <- bquote(list(.(mthd), .(flag), FALSE))
       e1 <- bquote(ft[ , .(c(out[4:5], "test")) := .(init)])
       e2 <- bquote(ft[hitc >= 0.9 & coff >= 5,
-                      test := top < 50 | max_med < 50])
+                      test := abs(top) < 50 | abs(max_med_diff) < 50])
       e3 <- bquote(ft[hitc >= 0.9 & coff < 5,
-                      test := top < log2(1.5) | max_med < log2(1.5)])
+                      test := abs(top) < log2(1.5) | abs(max_med_diff) < log2(1.5)])
       e4 <- bquote(f[[.(mthd)]] <- ft[which(test), .SD, .SDcols = .(out)])
       cr <- c("mc6_mthd_id", "flag", "test")
       e5 <- bquote(ft[ , .(cr) := NULL])
@@ -319,12 +325,30 @@ mc6_mthds <- function() {
 
     no.med.gt.3bmad = function(mthd) {
 
-      flag <- "Flag series where no median response values are greater than baseline as defined by 3 times the baseline median absolute deviation (bmad)"
+      flag <- "No median response values are greater than baseline as defined by 3 times the baseline median absolute deviation (bmad)"
       out  <- c("m5id", "m4id", "aeid", "mc6_mthd_id", 
                 "flag")
       init <- bquote(list(.(mthd), .(flag), FALSE))
       e1 <- bquote(ft[ , .(c(out[4:5], "test")) := .(init)])
-      e2 <- bquote(ft[ , test := nmed_gtbl == 0])
+      e2 <- bquote(ft[ , test := nmed_gtbl_pos == 0 & nmed_gtbl_neg == 0])
+      e3 <- bquote(f[[.(mthd)]] <- ft[which(test), .SD, .SDcols = .(out)])
+      cr <- c("mc6_mthd_id", "flag", "test")
+      e4 <- bquote(ft[ , .(cr) := NULL])
+      list(e1, e2, e3, e4)
+
+    },
+
+    no.med.single.dir.gt.3bmad = function(mthd) {
+
+      flag <- "No median response values in the intended fit direction are greater than baseline as defined by 3 times the baseline median absolute deviation (bmad)"
+      out  <- c("m5id", "m4id", "aeid", "mc6_mthd_id", 
+                "flag")
+      init <- bquote(list(.(mthd), .(flag), FALSE))
+      e1 <- bquote(ft[ , .(c(out[4:5], "test")) := .(init)])
+      e2 <- bquote(ft[ , test := (hitc > 0 & top > 0 & nmed_gtbl_pos == 0) | 
+                                 (hitc < 0 & top > 0 & nmed_gtbl_neg == 0) | 
+                                 (hitc > 0 & top < 0 & nmed_gtbl_neg == 0) | 
+                                 (hitc < 0 & top < 0 & nmed_gtbl_pos == 0)])
       e3 <- bquote(f[[.(mthd)]] <- ft[which(test), .SD, .SDcols = .(out)])
       cr <- c("mc6_mthd_id", "flag", "test")
       e4 <- bquote(ft[ , .(cr) := NULL])

diff --git a/R/tcplFit2.R b/R/tcplFit2.R
@@ -3,37 +3,45 @@
 #' @param dat output from level 3 processing
 #' @param fitmodels list of the models that should be fit with the data
 #' @param bmed baseline value, typically should be 0
-#' @param bidirectional boolean, default is TRUE (bidirectional fitting)
 #'
 #' @return Data.table with an additional column fitparams that includes all of the fitting parameters
 #' @importFrom tcplfit2 tcplfit2_core
 tcplFit2 <- function(dat,
                      fitmodels = c("cnst", "hill", "gnls", "poly1", "poly2", "pow", "exp2", "exp3", "exp4", "exp5"),
-                     bmed = NULL,
-                     bidirectional = TRUE) {
+                     bmed = NULL) {
   #variable binding
   resp  <-bmad  <-aeid  <-osd  <-m3id <- concentration_unlogged  <-response <- NULL
   # do all the regular fitting things that still need to be done
-  res <- dat[, `:=`(c("rmns", "rmds", "nconcs", "med_rmds"), {
+  res <- dat[, `:=`(c("rmns", "rmds", "nconcs", "med_rmds_pos", "med_rmds_neg"), {
     rmns <- mean(resp)
     rmds <- median(resp)
     nconcs <- .N
-    med_rmds <- rmds >= (3 * bmad)
-    .(rmns, rmds, nconcs, med_rmds)
+    med_rmds_pos <- rmds >= (3 * bmad)
+    med_rmds_neg <- rmds <= (-3 * bmad)
+    .(rmns, rmds, nconcs, med_rmds_pos, med_rmds_neg)
   }), keyby = .(aeid, spid, logc)][, .(
-    bmad = min(bmad), resp_max = max(resp), osd = min(osd), bmed = ifelse(is.null(bmed), 0, max(bmed)),
-    resp_min = min(resp), max_mean = max(rmns), max_mean_conc = logc[which.max(rmns)],
+    bmad = min(bmad), osd = min(osd), bmed = ifelse(is.null(bmed), 0, max(bmed)),
+    resp_max = max(resp), resp_min = min(resp),
+    max_mean = max(rmns), max_mean_conc = logc[which.max(rmns)],
     max_med = max(rmds), max_med_conc = logc[which.max(rmds)],
+    min_mean = min(rmns), min_mean_conc = logc[which.min(rmns)],
+    min_med = min(rmds), min_med_conc = logc[which.min(rmds)],
     logc_max = max(logc), logc_min = min(logc), nconc = length(unique(logc)),
-    npts = .N, nrep = median(as.numeric(nconcs)), nmed_gtbl = sum(med_rmds) / first(nconcs),
+    npts = .N, nrep = median(as.numeric(nconcs)), 
+    nmed_gtbl_pos = sum(med_rmds_pos) / first(nconcs), 
+    nmed_gtbl_neg = sum(med_rmds_neg) / first(nconcs),
     concentration_unlogged = list(10^(logc)), response = list(resp), m3ids = list(m3id)
   ),
   keyby = .(aeid, spid)
-  ][, `:=`(tmpi = seq_len(.N)), keyby = .(aeid)][,
+  ][, `:=`(c("max_med_diff", "max_med_diff_conc"), {
+    max_med_diff <- ifelse(abs(max_med) > abs(min_med), max_med, min_med)
+    max_med_diff_conc <- ifelse(abs(max_med) > abs(min_med), max_med_conc, min_med_conc)
+    .(max_med_diff, max_med_diff_conc)
+  })][, `:=`(tmpi = seq_len(.N)), keyby = .(aeid)][,
     `:=`(fitparams = list(tcplfit2::tcplfit2_core(unlist(concentration_unlogged),
       unlist(response),
       cutoff = bmad,
-      bidirectional = bidirectional,
+      bidirectional = TRUE,
       verbose = FALSE, force.fit = TRUE,
       fitmodels = fitmodels
     ))),

diff --git a/R/tcplLoadData.R b/R/tcplLoadData.R
@@ -416,14 +416,21 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
         resp_min,
         max_mean,
         max_mean_conc,
+        min_mean,
+        min_mean_conc,
         max_med,
         max_med_conc,
+        min_med,
+        min_med_conc,
+        max_med_diff,
+        max_med_diff_conc,
         logc_max,
         logc_min,
         nconc,
         npts,
         nrep,
-        nmed_gtbl
+        nmed_gtbl_pos,
+        nmed_gtbl_neg,
         FROM
         mc4
         "
@@ -440,14 +447,21 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
         resp_min,
         max_mean,
         max_mean_conc,
+        min_mean,
+        min_mean_conc,
         max_med,
         max_med_conc,
+        min_med,
+        min_med_conc,
+        max_med_diff,
+        max_med_diff_conc,
         logc_max,
         logc_min,
         nconc,
         npts,
         nrep,
-        nmed_gtbl,
+        nmed_gtbl_pos,
+        nmed_gtbl_neg,
         model,
         model_param,
         model_val
@@ -536,14 +550,21 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
         resp_min,
         max_mean,
         max_mean_conc,
+        min_mean,
+        min_mean_conc,
         max_med,
         max_med_conc,
+        min_med,
+        min_med_conc,
+        max_med_diff,
+        max_med_diff_conc,
         logc_max,
         logc_min,
         nconc,
         npts,
         nrep,
-        nmed_gtbl,
+        nmed_gtbl_pos,
+        nmed_gtbl_neg,
         hitc,
         modl,
         fitc,
@@ -568,14 +589,21 @@ tcplLoadData <- function(lvl, fld = NULL, val = NULL, type = "mc", add.fld = TRU
         resp_min,
         max_mean,
         max_mean_conc,
+        min_mean,
+        min_mean_conc,
         max_med,
         max_med_conc,
+        min_med,
+        min_med_conc,
+        max_med_diff,
+        max_med_diff_conc,
         logc_max,
         logc_min,
         nconc,
         npts,
         nrep,
-        nmed_gtbl,
+        nmed_gtbl_pos,
+        nmed_gtbl_neg,
         hitc,
         modl,
         fitc,

diff --git a/R/v3_schema_functions.R b/R/v3_schema_functions.R
@@ -12,14 +12,21 @@ write_lvl_4 <- function(dat){
                 "resp_min",
                 "max_mean",
                 "max_mean_conc",
+                "min_mean",
+                "min_mean_conc",
                 "max_med",
                 "max_med_conc",
+                "min_med",
+                "min_med_conc",
+                "max_med_diff",
+                "max_med_diff_conc",
                 "logc_max",
                 "logc_min",
                 "nconc",
                 "npts",
                 "nrep",
-                "nmed_gtbl",
+                "nmed_gtbl_pos",
+                "nmed_gtbl_neg",
                 "tmpi")
   mc4_agg_cols <- c(paste0("m", 0:4, "id"), "aeid")