Skip to content

Commit

Permalink
Fixes Open-Systems-Pharmacology#881 selection applied at SimulationSe…
Browse files Browse the repository at this point in the history
…t level

Deprecate evalDataFilter in favor of functions relying on dplyr::filter
  • Loading branch information
pchelle committed Oct 25, 2022
1 parent a66c997 commit 16c1eec
Show file tree
Hide file tree
Showing 7 changed files with 265 additions and 62 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ Imports:
R6,
ggplot2,
jsonlite,
ospsuite.utils (>= 1.3.0)
ospsuite.utils (>= 1.3.0),
dplyr
Suggests:
testthat (>= 2.1.0),
knitr,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ export(getPkAnalysisResultsFileNames)
export(getPopulationSensitivityAnalysisResultsFileNames)
export(getReportingEngineInfo)
export(getResiduals)
export(getSelectedData)
export(getSelectedRows)
export(getSimulationDescriptor)
export(getSimulationParameterDisplayPaths)
export(getSimulationResultFileNames)
Expand Down Expand Up @@ -160,6 +162,7 @@ export(startQualificationRunner)
export(trimFileName)
export(updateSimulationIndividualParameters)
export(vpcParameterPlot)
import(dplyr)
import(ggplot2)
import(jsonlite)
import(ospsuite)
Expand Down
131 changes: 114 additions & 17 deletions R/utilities-observed-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,106 @@ readObservedDataFile <- function(fileName,
return(observedData)
}

#' @title getSelectedData
#' @description
#' Get selected data
#' The function leverage `dplyr::filter` to select the data
#' @param data A data.frame
#' @param dataSelection Character string or expression evaluated to select data
#' The enum helper `DataSelectionKeys` provides keys for selected all or none of the data
#' @return A data.frame of selected data
#' @export
#' @import dplyr
#' @seealso DataSelectionKeys
#' @examples
#' data <- data.frame(
#' x = seq(0,9),
#' y = seq(10,19),
#' mdv = c(1,1, rep(0, 8)),
#' groups = rep(c("A", "B"), 5)
#' )
#'
#' # Select all the data
#' getSelectedData(data, DataSelectionKeys$ALL)
#'
#' # Select no data
#' getSelectedData(data, DataSelectionKeys$NONE)
#'
#' # Select data from group A
#' getSelectedData(data, "groups %in% 'A'")
#'
#' # Remove missing dependent variable (mdv)
#' getSelectedData(data, "mdv == 0")
#'
getSelectedData <- function(data, dataSelection) {
if(isEmpty(dataSelection)){
return(data[FALSE,])
}
if(isIncluded(dataSelection, DataSelectionKeys$ALL)){
return(data)
}
if(isIncluded(dataSelection, DataSelectionKeys$NONE)){
return(data[FALSE,])
}
if(isOfType(dataSelection, "expression")){
return(data %>% dplyr::filter(eval(dataSelection)))
}
return(data %>% dplyr::filter(eval(parse(text = dataSelection))))
}

#' @title getSelectedRows
#' @description
#' Get selected rows from data and its selection
#' The function leverage `dplyr::filter` to select the rows
#' @param data A data.frame
#' @param dataSelection Character string or expression evaluated to select data
#' The enum helper `DataSelectionKeys` provides keys for selected all or none of the data
#' @return A data.frame of selected data
#' @export
#' @import dplyr
#' @seealso DataSelectionKeys
#' @examples
#' data <- data.frame(
#' x = seq(0,9),
#' y = seq(10,19),
#' mdv = c(1,1, rep(0, 8)),
#' groups = rep(c("A", "B"), 5)
#' )
#'
#' # Select all the rows
#' getSelectedRows(data, DataSelectionKeys$ALL)
#'
#' # Select no row
#' getSelectedRows(data, DataSelectionKeys$NONE)
#'
#' # Select rows from group A
#' getSelectedData(data, "groups %in% 'A'")
#'
#' # Get rows of missing dependent variable (mdv)
#' getSelectedRows(data, "mdv == 0")
#'
getSelectedRows <- function(data, dataSelection) {
if(isEmpty(dataSelection)){
return(FALSE)
}
if(isIncluded(dataSelection, DataSelectionKeys$ALL)){
return(TRUE)
}
if(isIncluded(dataSelection, DataSelectionKeys$NONE)){
return(FALSE)
}
if(isOfType(dataSelection, "expression")){
selectedData <- data %>%
dplyr::mutate(rows = 1:n()) %>%
dplyr::filter(eval(dataSelection))
return(selectedData$rows)
}
selectedData <- data %>%
dplyr::mutate(rows = 1:n()) %>%
dplyr::filter(eval(parse(text = dataSelection)))
return(selectedData$rows)
}

#' @title evalDataFilter
#' @description
#' Evaluate a data filter by converting the variable names of the data.frame
Expand All @@ -95,6 +195,7 @@ readObservedDataFile <- function(fileName,
#' @return vector of logicals corresponding to the evaluation of the filter
#' @export
evalDataFilter <- function(data, filterExpression) {
.Deprecated("getSelectedRows")
variableNames <- names(data)
expressionList <- lapply(
variableNames,
Expand Down Expand Up @@ -146,6 +247,7 @@ loadObservedDataFromSimulationSet <- function(simulationSet) {

re.tStoreFileMetadata(access = "read", filePath = simulationSet$observedDataFile)
observedDataset <- readObservedDataFile(simulationSet$observedDataFile)
observedDataset <- getSelectedData(observedDataset, simulationSet$dataSelection)
re.tStoreFileMetadata(access = "read", filePath = simulationSet$observedMetaDataFile)
dictionary <- readObservedDataFile(simulationSet$observedMetaDataFile)

Expand Down Expand Up @@ -182,10 +284,10 @@ loadObservedDataFromSimulationSet <- function(simulationSet) {

# If unit was actually defined using output objects, overwrite current dvUnit
for (output in simulationSet$outputs) {
if (isOfLength(output$dataUnit, 0)) {
if (isEmpty(output$dataUnit)) {
next
}
selectedRows <- evalDataFilter(observedDataset, output$dataSelection)
selectedRows <- getSelectedRows(observedDataset, output$dataSelection)
observedDataset[selectedRows, dvUnitColumn] <- output$dataUnit
}

Expand Down Expand Up @@ -260,26 +362,21 @@ getObservedDataFromOutput <- function(output, data, dataMapping, molWeight, stru
if (isEmpty(data)) {
return()
}
if (isEmpty(output$dataSelection)) {
return()
}

selectedRows <- evalDataFilter(data, output$dataSelection)
logDebug(messages$selectedObservedDataForPath(output$path, sum(selectedRows)))
# If filter did not select any data, return empty dataset
if (sum(selectedRows) == 0) {
selectedData <- getSelectedData(data, output$dataSelection)
logDebug(messages$selectedObservedDataForPath(output$path, nrow(selectedData)))
if (isEmpty(selectedData)) {
return()
}

# Get dimensions of observed data
dvDimensions <- unique(as.character(data[selectedRows, dataMapping$dimension]))
outputConcentration <- data[selectedRows, dataMapping$dv]
dvDimensions <- unique(as.character(selectedData[, dataMapping$dimension]))
outputConcentration <- selectedData[, dataMapping$dv]
if (!isEmpty(output$displayUnit)) {
for (dvDimension in dvDimensions) {
if (is.na(dvDimension)) {
next
}
dvSelectedRows <- data[selectedRows, dataMapping$dimension] %in% dvDimension
dvSelectedRows <- selectedData[, dataMapping$dimension] %in% dvDimension
outputConcentration[dvSelectedRows] <- ospsuite::toUnit(
dvDimension,
outputConcentration[dvSelectedRows],
Expand All @@ -291,7 +388,7 @@ getObservedDataFromOutput <- function(output, data, dataMapping, molWeight, stru
outputData <- data.frame(
"Time" = ospsuite::toUnit(
"Time",
data[selectedRows, dataMapping$time],
selectedData[, dataMapping$time],
structureSet$simulationSet$timeUnit
),
"Concentration" = outputConcentration,
Expand All @@ -306,13 +403,13 @@ getObservedDataFromOutput <- function(output, data, dataMapping, molWeight, stru
return(list(data = outputData, lloq = NULL))
}

lloqConcentration <- data[selectedRows, dataMapping$lloq]
lloqConcentration <- selectedData[, dataMapping$lloq]
if (!isEmpty(output$displayUnit)) {
for (dvDimension in dvDimensions) {
if (is.na(dvDimension)) {
next
}
dvSelectedRows <- data[selectedRows, dataMapping$dimension] %in% dvDimension
dvSelectedRows <- selectedData[, dataMapping$dimension] %in% dvDimension
lloqConcentration[dvSelectedRows] <- ospsuite::toUnit(
dvDimension,
lloqConcentration[dvSelectedRows],
Expand All @@ -324,7 +421,7 @@ getObservedDataFromOutput <- function(output, data, dataMapping, molWeight, stru
lloqOutput <- data.frame(
"Time" = ospsuite::toUnit(
"Time",
data[selectedRows, dataMapping$time],
selectedData[, dataMapping$time],
structureSet$simulationSet$timeUnit
),
"Concentration" = lloqConcentration,
Expand Down
45 changes: 45 additions & 0 deletions man/getSelectedData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 45 additions & 0 deletions man/getSelectedRows.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 16c1eec

Please sign in to comment.