Skip to content

Commit

Permalink
42 add option for export ready status in tcplsubsetchid (#110)
Browse files Browse the repository at this point in the history
* added export ready filter to tcplsubsetchid where default will not allow a representative sample to be selected if the assay is not export ready 1

* updated styling

* updated docs
  • Loading branch information
brown-jason authored Aug 3, 2023
1 parent 86b6fb6 commit d59dd85
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 101 deletions.
187 changes: 101 additions & 86 deletions R/tcplSubsetChid.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,154 +3,169 @@
#-------------------------------------------------------------------------------

#' @title Subset level 5 data to a single sample per chemical
#'
#'
#' @description
#' \code{tcplSubsetChid} subsets level 5 data to a single tested sample per
#' \code{tcplSubsetChid} subsets level 5 data to a single tested sample per
#' chemical. In other words, if a chemical is tested more than once (a chid
#' has more than one spid) for a given assay endpoint, the function uses a
#' has more than one spid) for a given assay endpoint, the function uses a
#' series of logic to select a single "representative" sample.
#'
#'
#' @param dat data.table, a data.table with level 5 data
#' @param flag Integer, the mc6_mthd_id values to go into the flag count, see
#' details for more information
#' @param type Character of length 1, the data type, "sc" or "mc"
#'
#' @param export_ready Boolean, default TRUE, should only export ready 1 values be included in calculation
#'
#' @details
#' \code{tcplSubsetChid} is intended to work with level 5 data that has
#' \code{tcplSubsetChid} is intended to work with level 5 data that has
#' chemical and assay information mapped with \code{\link{tcplPrepOtpt}}.
#'
#' To select a single sample, first a "consensus hit-call" is made by majority
#' rule, with ties defaulting to active. After the chemical-wise hit call is
#' made, the samples corresponding to to chemical-wise hit call are logically
#' ordered using the fit category, the number of the flags, and the modl_ga,
#'
#' To select a single sample, first a "consensus hit-call" is made by majority
#' rule, with ties defaulting to active. After the chemical-wise hit call is
#' made, the samples corresponding to to chemical-wise hit call are logically
#' ordered using the fit category, the number of the flags, and the modl_ga,
#' then the first sample for every chemical is selected.
#'
#' The \code{flag} param can be used to specify a subset of flags to be used in
#'
#' The \code{flag} param can be used to specify a subset of flags to be used in
#' the flag count. Leaving \code{flag} TRUE utilize all the available flags.
#' Setting \code{flag} to \code{FALSE} will do the subsetting without
#' Setting \code{flag} to \code{FALSE} will do the subsetting without
#' considering any flags.
#'
#'
#' @examples
#' ## Store the current config settings, so they can be reloaded at the end
#' ## Store the current config settings, so they can be reloaded at the end
#' ## of the examples
#' conf_store <- tcplConfList()
#' tcplConfExample()
#'
#'
#' ## Load the example level 5 data
#' d1 <- tcplLoadData(lvl = 5, fld = "aeid", val = 797)
#' d1 <- tcplPrepOtpt(d1)
#'
#'
#' ## Subset to an example of a duplicated chid
#' d2 <- d1[chid == 20182]
#' d2[ , list(m4id, hitc, fitc, modl_ga)]
#'
#' ## Here the consensus hit-call is 1 (active), and the fit categories are
#' d2[, list(m4id, hitc, fitc, modl_ga)]
#'
#' ## Here the consensus hit-call is 1 (active), and the fit categories are
#' ## all equal. Therefore, if the flags are ignored, the selected sample will
#' ## be the sample with the lowest modl_ga.
#' tcplSubsetChid(dat = d2, flag = FALSE)[ , list(m4id, modl_ga)]
#'
#' tcplSubsetChid(dat = d2, flag = FALSE)[, list(m4id, modl_ga)]
#'
#' ## Reset configuration
#' options(conf_store)
#'
#' @return A data.table with a single sample for every given chemical-assay
#' pair.
#'
#'
#' @return A data.table with a single sample for every given chemical-assay
#' pair.
#'
#' @seealso \code{\link{tcplPrepOtpt}}
#'
#'
#' @import data.table
#' @export

tcplSubsetChid <- function(dat, flag = TRUE, type = "mc") {

tcplSubsetChid <- function(dat, flag = TRUE, type = "mc", export_ready = TRUE) {
## Variable-binding to pass R CMD Check
chit <- hitc <- aeid <- casn <- fitc <- fitc.ordr <- m4id <- nflg <- NULL
chid <- logc <- minc <- NULL
if (!type %in% c("mc","sc")) {

if (!type %in% c("mc", "sc")) {
stop("type must be sc (single concentration) or mc (multi-concentration)")
}

if(type == "mc"){

# if only using export ready, filter data by export ready status
if (export_ready) {
# try catch here to make sure db has available annotation information
tryCatch(
{
assay_info <- tcplLoadAeid(fld = "aeid", val = dat$aeid, add.fld = "export_ready")
},
error = function(error) {
message(error)
if (grepl("Not all given fields available in query", error)) stop(paste0("\n'export_ready' column is not available in ", options()$TCPL_DB, ". Consider setting export_ready to FALSE."))
}
)

# join assay information
dat <- dat[assay_info, on = .(aeid)]
# filter export ready column
dat <- dat[export_ready == 1]
}

if (type == "mc") {
if (!"m5id" %in% names(dat)) {
stop("'dat' must be a data.table with level 5 data. See ?tcplLoadData for",
" more information.")
stop(
"'dat' must be a data.table with level 5 data. See ?tcplLoadData for",
" more information."
)
}
if (!"casn" %in% names(dat)) dat <- tcplPrepOtpt(dat)
#for now we treat all >=.9 hitcall equally
# for now we treat all >=.9 hitcall equally
dat[, hitc := hitc >= .9]
dat[ , chit := mean(hitc[hitc %in% 0:1]) >= 0.5, by = list(aeid, chid)]
dat[, chit := mean(hitc[hitc %in% 0:1]) >= 0.5, by = list(aeid, chid)]
dat <- dat[hitc == chit | (is.na(chit) & (hitc == -1 | is.na(m4id)))]
dat[ , fitc.ordr := NA_integer_]


dat[, fitc.ordr := NA_integer_]
dat[fitc %in% c(37, 41, 46, 50), fitc.ordr := 0]
dat[fitc %in% c(38, 42, 47, 51), fitc.ordr := 1]
dat[fitc %in% c(36, 40, 45, 49), fitc.ordr := 2]
if (is.null(flag)) flag <- TRUE

if (flag[1] | length(flag) > 1) {

tst <- is.logical(flag)
prs <- if (tst) list() else list(fld = "mc6_mthd_id", val = flag)
prs <- if (tst) list() else list(fld = "mc6_mthd_id", val = flag)
flg <- do.call(tcplLoadData, c(lvl = 6L, prs))
flg <- flg[ , list(nflg = .N), by = m4id]
flg <- flg[, list(nflg = .N), by = m4id]
setkey(flg, m4id)
setkey(dat, m4id)

dat <- flg[dat]
dat[is.na(nflg), nflg := 0]

} else {

dat[ , nflg := FALSE]

dat[, nflg := FALSE]
}
#setkeyv(dat, c("aeid", "chid", "fitc.ordr", "nflg", "modl_ga"))
ac50var <- ifelse(check_tcpl_db_schema(),"ac50","modl_ga")
setorderv(dat, c("aeid", "chid", "fitc.ordr", "nflg", ac50var,"max_med"),c(1,1,1,1,1,-1), na.last = TRUE)
min_modl_ga <- dat[ , list(ind = .I[1]), by = list(aeid, casn)]

# setkeyv(dat, c("aeid", "chid", "fitc.ordr", "nflg", "modl_ga"))
ac50var <- ifelse(check_tcpl_db_schema(), "ac50", "modl_ga")
setorderv(dat, c("aeid", "chid", "fitc.ordr", "nflg", ac50var, "max_med"), c(1, 1, 1, 1, 1, -1), na.last = TRUE)

min_modl_ga <- dat[, list(ind = .I[1]), by = list(aeid, casn)]

dat <- dat[min_modl_ga$ind]

return(dat[])
}
if(type == "sc"){

if (type == "sc") {
if (!"s2id" %in% names(dat)) {
stop("'dat' must be a single concentration data.table with level 2 data. See ?tcplLoadData for",
" more information.")
stop(
"'dat' must be a single concentration data.table with level 2 data. See ?tcplLoadData for",
" more information."
)
}
if (!"casn" %in% names(dat)) dat <- tcplPrepOtpt(dat)
dat[ , chit := mean(hitc[hitc %in% 0:1]) >= 0.5, by = list(aeid, chid)]
dat <- dat[hitc == chit ]
#select those that are at lowest concentration in the consensus hitc
dat1 <- tcplLoadData("agg",fld = "s2id", val = dat$s2id,type = "sc")
setkey(dat1,"s2id")
setkey(dat,"s2id")

dat[, chit := mean(hitc[hitc %in% 0:1]) >= 0.5, by = list(aeid, chid)]
dat <- dat[hitc == chit]

# select those that are at lowest concentration in the consensus hitc
dat1 <- tcplLoadData("agg", fld = "s2id", val = dat$s2id, type = "sc")
setkey(dat1, "s2id")
setkey(dat, "s2id")
dat <- dat[dat1]
setkeyv(dat,c("aeid","chid","logc"))
dat[ , minc := min(logc), by = list(aeid, chid)]
dat <- dat[logc == minc ]
dat <- unique(dat[,c("spid","chid","casn","chnm","dsstox_substance_id","code","aeid","aenm","s2id","bmad","max_med","hitc","coff","resp_unit")])

setkeyv(dat, c("aeid", "chid", "logc"))
dat[, minc := min(logc), by = list(aeid, chid)]
dat <- dat[logc == minc]
dat <- unique(dat[, c("spid", "chid", "casn", "chnm", "dsstox_substance_id", "code", "aeid", "aenm", "s2id", "bmad", "max_med", "hitc", "coff", "resp_unit")])
setkeyv(dat, c("aeid", "chid", "max_med"))
#select top max_med per casn
min_max_med <- dat[ , list(ind = .I[.N]), by = list(aeid, casn)]
#filter out all others

# select top max_med per casn
min_max_med <- dat[, list(ind = .I[.N]), by = list(aeid, casn)]

# filter out all others
dat <- dat[min_max_med$ind]
return(dat[] )

return(dat[])
}




}

#-------------------------------------------------------------------------------
32 changes: 17 additions & 15 deletions man/tcplSubsetChid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d59dd85

Please sign in to comment.