Skip to content

Commit

Permalink
Merge pull request #124 from hubverse-org/ak/fix-required-vals-bug/123
Browse files Browse the repository at this point in the history
Fix required vals check bug
  • Loading branch information
annakrystalli authored Oct 3, 2024
2 parents 85d5825 + ccb2a2e commit 33c05d5
Show file tree
Hide file tree
Showing 12 changed files with 393 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: hubValidations
Title: Testing framework for hubverse hub validations
Version: 0.6.2.9000
Version: 0.6.2.9001
Authors@R: c(
person(
given = "Anna",
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# hubValidations (development version)

* Added function `create_custom_check()` for creating custom validation check function files from templates (#121).
* Fixed bug in `check_tbl_values_required()` causing required missing values to not be identified correctly when all output types were optional (#123)

# hubValidations 0.6.2

Expand Down
33 changes: 31 additions & 2 deletions R/expand_model_out_grid.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
#' character which can be faster when large expanded grids are expected.
#' If `required_vals_only = TRUE`, values are limited to the combinations of required
#' values only.
#'
#' Note that if `required_vals_only = TRUE` and an optional output type is
#' requested through `output_types`, a zero row grid will be returned.
#' If all output types are requested however (i.e. when `output_types = NULL`) and
#' they are all optional, a grid of required task ID values only will be returned.
#' @inheritParams hubData::coerce_to_hub_schema
#' @details
#' When a round is set to `round_id_from_variable: true`,
Expand Down Expand Up @@ -166,6 +171,9 @@ expand_model_out_grid <- function(config_tasks,
config_tasks, round_id
)
round_config <- get_round_config(config_tasks, round_id)
# Create a logical variable to control what is returned by expand_output_type_grid.
# See not in fn for details.
all_output_types <- is.null(output_types) # nolint: object_usage_linter

task_id_l <- purrr::map(
round_config[["model_tasks"]],
Expand Down Expand Up @@ -211,7 +219,8 @@ expand_model_out_grid <- function(config_tasks,
task_id_l, output_type_l,
~ expand_output_type_grid(
task_id_values = .x,
output_type_values = .y
output_type_values = .y,
all_output_types = all_output_types
)
)

Expand Down Expand Up @@ -245,7 +254,27 @@ process_grid_inputs <- function(x, required_vals_only = FALSE) {
# Function that expands modeling task level lists of task IDs and output type
# values into a grid and combines them into a single tibble.
expand_output_type_grid <- function(task_id_values,
output_type_values) {
output_type_values,
all_output_types = TRUE) {
# Return a grid of only task IDs if no output type values are provided but
# only if a specific output type subset is not requested.
# Otherwise return a zero row grid.
# No output type values can either be the result of all optional output types
# when required values only are requested or as a result of output type sub-setting.
# When requesting required values only for an entire rounds (i.e. all output types),
# we want required task ID values to be returned, even is all output types are
# optional. However, it does not make sense to return required values for an
# optional output type when a user is specifically requesting required values
# for an output type. In that situation it's more appropriate to return a zero row grid.
if (length(output_type_values) == 0 && all_output_types) {
return(
expand.grid(
purrr::compact(task_id_values),
stringsAsFactors = FALSE
)
)
}

purrr::imap(
output_type_values,
~ c(task_id_values, list(
Expand Down
5 changes: 5 additions & 0 deletions man/expand_model_out_grid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions tests/testthat/_snaps/check_tbl_values_required.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,46 @@
Message:
Required task ID/output type/output type ID combinations all present.

# (#123) check_tbl_values_required works with all optional output types

Code
opt_output_type_ids_result
Output
<error/check_failure>
Error:
! Required task ID/output type/output type ID combinations missing. See `missing` attribute for details.

---

Code
opt_output_type_ids_result$missing
Output
# A tibble: 168 x 6
nowcast_date target_date clade location output_type output_type_id
<date> <date> <chr> <chr> <chr> <chr>
1 2024-10-02 2024-09-01 24A AL sample <NA>
2 2024-10-02 2024-09-01 24B AL sample <NA>
3 2024-10-02 2024-09-01 24A CA sample <NA>
4 2024-10-02 2024-09-01 24B CA sample <NA>
5 2024-10-02 2024-09-02 24A AL sample <NA>
6 2024-10-02 2024-09-02 24B AL sample <NA>
7 2024-10-02 2024-09-02 24A CA sample <NA>
8 2024-10-02 2024-09-02 24B CA sample <NA>
9 2024-10-02 2024-09-03 24A AL sample <NA>
10 2024-10-02 2024-09-03 24B AL sample <NA>
# i 158 more rows

---

Code
check_for_errors(validate_submission(hub_path, file_path))
Message
-- 2024-10-02-UMass-HMLR.parquet ----
x [req_vals]: Required task ID/output type/output type ID combinations missing. See `missing` attribute for details.
Condition
Error in `check_for_errors()`:
!
The validation checks produced some failures/errors reported above.

30 changes: 30 additions & 0 deletions tests/testthat/test-check_tbl_values_required.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,33 @@ test_that("Ignoring derived_task_ids in check_tbl_values_required works", {
)
)
})

test_that("(#123) check_tbl_values_required works with all optional output types", {
skip_if_offline()

hub_path <- test_path("testdata", "hub-now")
file_path <- "UMass-HMLR/2024-10-02-UMass-HMLR.parquet"
round_id <- "2024-10-02"
tbl <- read_model_out_file(
hub_path = hub_path, file_path = file_path,
coerce_types = "chr"
)

opt_output_type_ids_result <- check_tbl_values_required(
tbl, round_id, file_path, hub_path
)
# Check output correct
expect_snapshot(opt_output_type_ids_result)
# Missing output structure correct
expect_snapshot(opt_output_type_ids_result$missing)
# Missing clades identified correctly
expect_equal(
unique(opt_output_type_ids_result$missing$clade),
c("24A", "24B")
)
# Ensure that req_vals check is the only one that fails
expect_snapshot(
check_for_errors(validate_submission(hub_path, file_path)),
error = TRUE
)
})
28 changes: 28 additions & 0 deletions tests/testthat/test-expand_model_out_grid.R
Original file line number Diff line number Diff line change
Expand Up @@ -525,3 +525,31 @@ test_that("expand_model_out_grid errors correctly", {
error = TRUE
)
})

test_that("(#123) expand_output_type_grid() returns expected outputs with optional output_type_id", {
tasks <- list(
nowcast_date = "2024-10-02",
target_date = NULL,
clade = c("24A", "24B", "recombinant", "other"),
other_task = 1:2,
location = NULL
)
# If specific output type subset is requested
i_have_no_rows <- expand_output_type_grid(
task_id_values = tasks,
output_type_values = list(),
all_output_types = FALSE
)
expect_equal(nrow(i_have_no_rows), 0)
expect_equal(ncol(i_have_no_rows), 0)

# When no specific output_type subset is requested
i_have_eight_rows <- expand_output_type_grid(
task_id_values = tasks,
output_type_values = list(),
all_output_types = TRUE
)
expect_equal(nrow(i_have_eight_rows), 8)
expect_equal(ncol(i_have_eight_rows), 3)

})
24 changes: 24 additions & 0 deletions tests/testthat/testdata/hub-now/hub-config/admin.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"schema_version": "https://raw.githubusercontent.com/Infectious-Disease-Modeling-Hubs/schemas/main/v3.0.1/admin-schema.json",
"name": "SARS-CoV-2 Variant Nowcast Hub",
"maintainer": "jbloggs @ UMass-Amherst",
"contact": {
"name": "Joe blogss",
"email": "[email protected]"
},
"repository": {
"host": "github",
"owner": "jbloggs",
"name": "variant-nowcast-hub"
},
"file_format": ["parquet"],
"timezone": "US/Eastern",
"cloud": {
"enabled": false,
"host": {
"name": "aws",
"storage_service": "s3",
"storage_location": "variant-nowcast-hub"
}
}
}
132 changes: 132 additions & 0 deletions tests/testthat/testdata/hub-now/hub-config/model-metadata-schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Schema for SARS-CoV-2 Variant Nowcast Hub model metadata",
"description": "This is the schema for model metadata files, please refer to https://github.com/reichlab/variant-nowcast-hub/blob/main/README.md#model-metadata for more information.",
"type": "object",
"properties": {
"team_name": {
"description": "The name of the team submitting the model",
"type": "string",
"maxLength": 50
},
"team_abbr": {
"description": "Abbreviated name of the team submitting the model",
"type": "string",
"pattern": "^[a-zA-Z0-9_]+$",
"maxLength": 16
},
"model_name": {
"description": "The name of the model",
"type": "string",
"maxLength": 50
},
"model_abbr": {
"description": "Abbreviated name of the model",
"type": "string",
"pattern": "^[a-zA-Z0-9_]+$",
"maxLength": 16
},
"model_version": {
"description": "Identifier of the version of the model, if multiple versions are present.",
"type": "string"
},
"model_contributors": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"affiliation": {
"type": "string"
},
"email": {
"type": "string",
"format": "email"
},
"orcid": {
"type": "string",
"pattern": "^\\d{4}\\-\\d{4}\\-\\d{4}\\-[\\dX]{4}$"
}
},
"additionalProperties": false,
"required": [
"name",
"affiliation",
"email"
]
}
},
"website_url": {
"description": "Public facing website for the model",
"type": "string",
"format": "uri"
},
"repo_url": {
"description": "Repository containing code for the model",
"type": "string",
"format": "uri"
},
"license": {
"description": "License for use of model output data",
"type": "string",
"enum": [
"CC0-1.0",
"CC-BY-4.0",
"CC-BY_SA-4.0",
"PPDL",
"ODC-by",
"ODbL",
"OGL-3.0"
]
},
"citation": {
"description": "One or more citations for this model",
"type": "string",
"examples": ["Gibson GC , Reich NG , Sheldon D. Real-time mechanistic bayesian forecasts of Covid-19 mortality. medRxiv. 2020. https://doi.org/10.1101/2020.12.22.20248736"]
},
"team_funding": {
"description": "Any information about funding source for the team or members of the team.",
"type": "string",
"examples": ["National Institutes of General Medical Sciences (R01GM123456). The content is solely the responsibility of the authors and does not necessarily represent the official views of NIGMS."]
},
"methods": {
"description": "A summary of the methods used by this model (5000 character limit). Among other details, this should include details about the joint dependence structure of the model, and across what variables the model draws joint distributions, e.g., across horizons but not horizons and locations.",
"type": "string",
"maxLength": 5000
},
"methods_url": {
"description": "A link to a complete write-up of the model specification, with mathematical details. This could be a peer-reviewed article, preprint, or an unpublished PDF or webpage stored at a public url somewhere.",
"type": "string",
"format": "uri"
},
"data_sources": {
"description": "List or description of data inputs used by the model. For example: NextStrain, GISAID for sequences outside of the U.S., wastewater variant proportions, etc....",
"type": "string"
},
"ensemble_of_models": {
"description": "Indicator for whether this model is an ensemble of any separate component models",
"type": "boolean"
},
"ensemble_of_hub_models": {
"description": "Indicator for whether this model is an ensemble specifically of other models submitted to this Hub",
"type": "boolean"
}
},
"additionalProperties": false,
"required": [
"team_name",
"team_abbr",
"model_name",
"model_abbr",
"model_contributors",
"license",
"team_funding",
"methods",
"methods_url",
"data_sources",
"ensemble_of_models",
"ensemble_of_hub_models"
]
}
Loading

0 comments on commit 33c05d5

Please sign in to comment.