Skip to content

Commit

Permalink
adding minCellCount to database upload
Browse files Browse the repository at this point in the history
- adding minCellCount to database upload
- adding test for min cell count
  • Loading branch information
jreps committed May 19, 2023
1 parent bdaad07 commit 5e5c401
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 8 deletions.
40 changes: 38 additions & 2 deletions R/Database.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ insertAndromedaToDatabase <- function(
andromedaObject,
tempEmulationSchema,
bulkLoad = T,
tablePrefix = 'c_'
tablePrefix = 'c_',
minCellCount = 0,
minCellCountColumns = list()
){
errorMessages <- checkmate::makeAssertCollection()
.checkTablePrefix(
Expand All @@ -78,11 +80,19 @@ insertAndromedaToDatabase <- function(
Andromeda::batchApply(
tbl = andromedaObject,
fun = function(x){

data <- as.data.frame(x %>% dplyr::collect()) # apply minCellCount
data <- removeMinCell(
data = data,
minCellCount = minCellCount,
minCellCountColumns = minCellCountColumns
)

DatabaseConnector::insertTable(
connection = connection,
databaseSchema = databaseSchema,
tableName = paste0(tablePrefix,tableName),
data = as.data.frame(x %>% dplyr::collect()),
data = data,
dropTableIfExists = F,
createTable = F,
tempEmulationSchema = tempEmulationSchema,
Expand All @@ -95,6 +105,32 @@ insertAndromedaToDatabase <- function(
return(TRUE)
}

removeMinCell <- function(
data,
minCellCount = 0,
minCellCountColumns = list()
){
for(columns in minCellCountColumns){
ind <- apply(
X = data[,columns, drop = FALSE],
MARGIN = 1,
FUN = function(x) sum(x < minCellCount)>0
)

if(sum(ind) > 0 ){
ParallelLogger::logInfo(
paste0(
'Removing values less than ',
minCellCount,
' from ',
paste(columns, collapse = ' and ')
)
)
data[ind, columns] <- -1
}
}
return(data)
}



Expand Down
30 changes: 25 additions & 5 deletions R/RunCharacterization.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ loadCharacterizationSettings <- function(
#' @param tablePrefix A string to append the tables in the results
#' @param databaseId The unqiue identifier for the cdm database
#' @param showSubjectId Whether to include subjectId of failed rechallenge case series or hide
#' @param minCellCount The minimum count value that is calculated
#'
#' @return
#' An sqlite database with the results is saved into the saveDirectory and a csv file named tacker.csv
Expand All @@ -142,7 +143,8 @@ runCharacterizationAnalyses <- function(
saveDirectory,
tablePrefix = "c_",
databaseId = "1",
showSubjectId = F
showSubjectId = F,
minCellCount = 0
) {
# inputs checks
errorMessages <- checkmate::makeAssertCollection()
Expand Down Expand Up @@ -218,7 +220,9 @@ runCharacterizationAnalyses <- function(
databaseSchema = "main",
tableName = "time_to_event",
andromedaObject = result$timeToEvent,
tablePrefix = tablePrefix
tablePrefix = tablePrefix,
minCellCount = minCellCount,
minCellCountColumns = list('numEvents')
)
}

Expand Down Expand Up @@ -266,7 +270,15 @@ runCharacterizationAnalyses <- function(
databaseSchema = "main",
tableName = "dechallenge_rechallenge",
andromedaObject = result$dechallengeRechallenge,
tablePrefix = tablePrefix
tablePrefix = tablePrefix,
minCellCount = minCellCount,
minCellCountColumns = list(
c('numEvents'),
c('dechallengeAttempt'),
c('dechallengeFail', 'dechallengeSuccess'),
c('rechallengeAttempt'),
c('rechallengeFail', 'rechallengeSuccess')
)
)
}

Expand Down Expand Up @@ -401,7 +413,11 @@ runCharacterizationAnalyses <- function(
databaseSchema = "main",
tableName = "covariates",
andromedaObject = result$covariates,
tablePrefix = tablePrefix
tablePrefix = tablePrefix,
minCellCount = minCellCount,
minCellCountColumns = list(
c('sumValue') #c('SUM_VALUE') #AVERAGE_VALUE
)
)
}

Expand All @@ -411,7 +427,11 @@ runCharacterizationAnalyses <- function(
databaseSchema = "main",
tableName = "covariates_continuous",
andromedaObject = result$covariatesContinuous,
tablePrefix = tablePrefix
tablePrefix = tablePrefix,
minCellCount = minCellCount,
minCellCountColumns = list(
c('countValue')
)
)
}
}
Expand Down
5 changes: 4 additions & 1 deletion man/runCharacterizationAnalyses.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 63 additions & 0 deletions tests/testthat/test-Database.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
context("Database")

test_that("removeMinCell", {
data <- data.frame(name = rep('dfd', 10), val = 1:10)
newData <- removeMinCell(
data = data, minCellCount = 5, minCellCountColumns = list()
)
testthat::expect_equal(newData, data)

newData <- removeMinCell(
data = data, minCellCount = 5, minCellCountColumns = list('val')
)
testthat::expect_equal(newData$val[5:10], data$val[5:10])
testthat::expect_equal(newData$val[1:4], rep(-1,4))

newData <- removeMinCell(
data = data, minCellCount = 50, minCellCountColumns = list('val')
)
testthat::expect_equal(newData$val, rep(-1,10))

data <- data.frame(
name = rep('dfd', 10),
val = 1:10,
val2 = c(1,10,1,10,1,10,1,10,1,10)
)
newData <- removeMinCell(
data = data, minCellCount = 5, minCellCountColumns = list('val')
)
testthat::expect_equal(newData$val[1:4], rep(-1,4))
testthat::expect_equal(newData$val2, data$val2,)

newData <- removeMinCell(
data = data, minCellCount = 5, minCellCountColumns = list(c('val','val2'))
)
testthat::expect_equal(
sum(
(newData$val > 0 & newData$val < 5) | (newData$val2 > 0 & newData$val2 < 5)
),
0
)

data <- data.frame(
name = rep('dfd', 10),
val = 1:10,
val2 = c(1,10,1,10,1,10,1,10,1,10),
val3 = c(10,10,10,10,10,1,1,10,10,10)
)
newData <- removeMinCell(
data = data,
minCellCount = 5,
minCellCountColumns =
list(c('val','val2'), 'val3')
)
testthat::expect_equal(
sum(
(newData$val > 0 & newData$val < 5) |
(newData$val2 > 0 & newData$val2 < 5) |
(newData$val3 > 0 & newData$val3 < 5)
),
0
)

})

0 comments on commit 5e5c401

Please sign in to comment.