From aad5e5aa150009f57f02eb730ddf59d397e58a4b Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 30 Oct 2024 10:55:14 +0200 Subject: [PATCH 1/5] Add check_file_n function. Related to #139 --- NAMESPACE | 1 + R/check_file_n.R | 40 +++++++++++++++++++++++++++ man/check_file_n.Rd | 38 +++++++++++++++++++++++++ tests/testthat/_snaps/check_file_n.md | 10 +++++++ tests/testthat/test-check_file_n.R | 28 +++++++++++++++++++ 5 files changed, 117 insertions(+) create mode 100644 R/check_file_n.R create mode 100644 man/check_file_n.Rd create mode 100644 tests/testthat/_snaps/check_file_n.md create mode 100644 tests/testthat/test-check_file_n.R diff --git a/NAMESPACE b/NAMESPACE index f707b418..f9c45d22 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(check_config_hub_valid) export(check_file_exists) export(check_file_format) export(check_file_location) +export(check_file_n) export(check_file_name) export(check_file_read) export(check_for_errors) diff --git a/R/check_file_n.R b/R/check_file_n.R new file mode 100644 index 00000000..8d21c56a --- /dev/null +++ b/R/check_file_n.R @@ -0,0 +1,40 @@ +#' Check number of files submitted per round does not exceed the allowed number +#' of submissions per team. +#' +#' @inheritParams check_tbl_col_types +#' @param allowed_n integer(1). The maximum number of files allowed per round. +#' @inherit check_tbl_col_types return +#' +#' @export +check_file_n <- function(file_path, hub_path, allowed_n = 1L) { + checkmate::assert_integer(allowed_n, lower = 1L, len = 1L) + file_name <- basename(file_path) + file_name_sans_ext <- fs::path_ext_remove(file_name) + team_dir <- dirname(abs_file_path(file_path, hub_path)) + + existing_files <- fs::dir_ls(team_dir, regex = file_name_sans_ext) |> + fs::path_rel(dirname(team_dir)) |> + setdiff(file_path) # Remove file being validated from check + existing_n <- length(existing_files) + + check <- existing_n < allowed_n + + if (check) { + details <- NULL + } else { + details <- cli::format_inline( + "Should be {.val {allowed_n}} but {cli::qty(existing_n)} pre-existing round + submission file{?s} {.val {existing_files}} found in team directory." + ) + } + + capture_check_cnd( + check = check, + file_path = file_path, + msg_subject = "Number of accepted model output files per round", + msg_verbs = c("met.", "exceeded."), + msg_attribute = NULL, + error = FALSE, + details = details + ) +} diff --git a/man/check_file_n.Rd b/man/check_file_n.Rd new file mode 100644 index 00000000..8cd62658 --- /dev/null +++ b/man/check_file_n.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check_file_n.R +\name{check_file_n} +\alias{check_file_n} +\title{Check number of files submitted per round does not exceed the allowed number +of submissions per team.} +\usage{ +check_file_n(file_path, hub_path, allowed_n = 1L) +} +\arguments{ +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} + +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubData:s3_bucket]{s3_bucket()}} +or \code{\link[hubData:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{allowed_n}{integer(1). The maximum number of files allowed per round.} +} +\value{ +Depending on whether validation has succeeded, one of: +\itemize{ +\item \verb{} condition class object. +\item \verb{} condition class object. +} + +Returned object also inherits from subclass \verb{}. +} +\description{ +Check number of files submitted per round does not exceed the allowed number +of submissions per team. +} diff --git a/tests/testthat/_snaps/check_file_n.md b/tests/testthat/_snaps/check_file_n.md new file mode 100644 index 00000000..9526d0c9 --- /dev/null +++ b/tests/testthat/_snaps/check_file_n.md @@ -0,0 +1,10 @@ +# check_file_n works + + Code + check_file_n(file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.parquet", + hub_path) + Output + + Error: + ! Number of accepted model output files per round exceeded. Should be 1 but pre-existing round submission file "team1-goodmodel/2022-10-08-team1-goodmodel.csv" found in team directory. + diff --git a/tests/testthat/test-check_file_n.R b/tests/testthat/test-check_file_n.R new file mode 100644 index 00000000..538bd163 --- /dev/null +++ b/tests/testthat/test-check_file_n.R @@ -0,0 +1,28 @@ +test_that("check_file_n works", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.parquet" + check_file_n(file_path, hub_path) + + expect_s3_class( + check_file_n( + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + hub_path + ), + c("check_success", "rlang_message", "message", "condition") + ) + + expect_s3_class( + check_file_n( + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.parquet", + hub_path + ), + c("check_failure", "hub_check", "rlang_error", "error", "condition") + ) + + expect_snapshot( + check_file_n( + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.parquet", + hub_path + ) + ) +}) From 505427e2650bf6f31fe73b6c6544e097d15cd07c Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 30 Oct 2024 11:21:07 +0200 Subject: [PATCH 2/5] Add check_file_n to validate_model_file. Resolves #139 --- R/validate_model_file.R | 7 ++ inst/check_table.csv | 1 + tests/testthat/_snaps/validate_model_file.md | 16 +++- tests/testthat/_snaps/validate_pr.md | 94 ++++++++++++++++++-- tests/testthat/_snaps/validate_submission.md | 64 +++++++++++-- 5 files changed, 165 insertions(+), 17 deletions(-) diff --git a/R/validate_model_file.R b/R/validate_model_file.R index 6a5a1917..cce48acd 100644 --- a/R/validate_model_file.R +++ b/R/validate_model_file.R @@ -85,6 +85,13 @@ validate_model_file <- function(hub_path, file_path, return(checks) } + checks$file_n <- try_check( + check_file_n( + file_path = file_path, + hub_path = hub_path + ), file_path + ) + checks$metadata_exists <- try_check( check_submission_metadata_file_exists( hub_path = hub_path, diff --git a/inst/check_table.csv b/inst/check_table.csv index fffb51c8..bd983014 100644 --- a/inst/check_table.csv +++ b/inst/check_table.csv @@ -6,6 +6,7 @@ file_name,File name valid,TRUE,check_error,validate_model_file,check_file_name,, file_location,File located in correct team directory,FALSE,check_failure,validate_model_file,check_file_location,,FALSE round_id_valid,File round ID is valid hub round IDs,TRUE,check_error,validate_model_file,check_valid_round_id,,FALSE file_format,File format is accepted hub/round format,TRUE,check_error,validate_model_file,check_file_format,,FALSE +file_n,Number of submission files per round per team does not exceed allowed number,FALSE,check_failure,validate_model_file,check_file_n,,FALSE metadata_exists,Model metadata file exists in expected location,FALSE,check_failure,validate_model_file,check_submission_metadata_file_exists,,FALSE file_read,File can be read without errors,TRUE,check_error,validate_model_data,check_file_read,,FALSE valid_round_id_col,Round ID var from config exists in data column names. Skipped if `round_id_from_var` is FALSE in config.,FALSE,check_failure,validate_model_data,check_valid_round_id_col,,FALSE diff --git a/tests/testthat/_snaps/validate_model_file.md b/tests/testthat/_snaps/validate_model_file.md index b39723fc..a8bcc6a8 100644 --- a/tests/testthat/_snaps/validate_model_file.md +++ b/tests/testthat/_snaps/validate_model_file.md @@ -3,7 +3,7 @@ Code str(validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv")) Output - List of 6 + List of 7 $ file_exists :List of 4 ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -34,6 +34,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists:List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -70,6 +76,7 @@ v [file_location]: File directory name matches `model_id` metadata in file name. v [round_id_valid]: `round_id` is valid. v [file_format]: File is accepted hub format. + v [file_n]: Number of accepted model output files per round met. v [metadata_exists]: Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. --- @@ -95,6 +102,7 @@ x [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" v [round_id_valid]: `round_id` is valid. v [file_format]: File is accepted hub format. + v [file_n]: Number of accepted model output files per round met. v [metadata_exists]: Metadata file exists at path 'model-metadata/hub-baseline.yml'. # validate_model_file print method work [ansi] @@ -110,6 +118,7 @@ v [file_location]: File directory name matches `model_id` metadata in file name. v [round_id_valid]: `round_id` is valid. v [file_format]: File is accepted hub format. + v [file_n]: Number of accepted model output files per round met. v [metadata_exists]: Metadata file exists at path model-metadata/team1-goodmodel.yaml. --- @@ -135,6 +144,7 @@ x [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" v [round_id_valid]: `round_id` is valid. v [file_format]: File is accepted hub format. + v [file_n]: Number of accepted model output files per round met. v [metadata_exists]: Metadata file exists at path model-metadata/hub-baseline.yml. # validate_model_file print method work [unicode] @@ -150,6 +160,7 @@ ✔ [file_location]: File directory name matches `model_id` metadata in file name. ✔ [round_id_valid]: `round_id` is valid. ✔ [file_format]: File is accepted hub format. + ✔ [file_n]: Number of accepted model output files per round met. ✔ [metadata_exists]: Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. --- @@ -175,6 +186,7 @@ ✖ [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" ✔ [round_id_valid]: `round_id` is valid. ✔ [file_format]: File is accepted hub format. + ✔ [file_n]: Number of accepted model output files per round met. ✔ [metadata_exists]: Metadata file exists at path 'model-metadata/hub-baseline.yml'. # validate_model_file print method work [fancy] @@ -190,6 +202,7 @@ ✔ [file_location]: File directory name matches `model_id` metadata in file name. ✔ [round_id_valid]: `round_id` is valid. ✔ [file_format]: File is accepted hub format. + ✔ [file_n]: Number of accepted model output files per round met. ✔ [metadata_exists]: Metadata file exists at path model-metadata/team1-goodmodel.yaml. --- @@ -215,5 +228,6 @@ ✖ [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel" ✔ [round_id_valid]: `round_id` is valid. ✔ [file_format]: File is accepted hub format. + ✔ [file_n]: Number of accepted model output files per round met. ✔ [metadata_exists]: Metadata file exists at path model-metadata/hub-baseline.yml. diff --git a/tests/testthat/_snaps/validate_pr.md b/tests/testthat/_snaps/validate_pr.md index 04a0a142..8ded696d 100644 --- a/tests/testthat/_snaps/validate_pr.md +++ b/tests/testthat/_snaps/validate_pr.md @@ -3,7 +3,7 @@ Code str(checks) Output - List of 23 + List of 24 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "valid_sb_hub" @@ -40,6 +40,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" @@ -159,7 +165,7 @@ Code str(invalid_checks) Output - Classes 'hub_validations', 'list' hidden list of 12 + Classes 'hub_validations', 'list' hidden list of 13 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "invalid_sb_hub" @@ -196,6 +202,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : 'fs_path' chr "hub-baseline/2022-10-22-hub-baseline.parquet" @@ -240,7 +252,7 @@ Code str(mod_checks_error) Output - List of 48 + List of 50 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "mod_del_hub" @@ -301,6 +313,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" @@ -442,6 +460,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n_1 :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists_1 :List of 6 ..$ message : chr "Metadata file does not exist at path 'model-metadata/team1-goodmodel.yml' or\n "| __truncated__ ..$ trace : NULL @@ -563,7 +587,7 @@ Code str(mod_checks_warn) Output - List of 48 + List of 50 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "mod_del_hub" @@ -624,6 +648,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" @@ -765,6 +795,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n_1 :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists_1 :List of 6 ..$ message : chr "Metadata file does not exist at path 'model-metadata/team1-goodmodel.yml' or\n "| __truncated__ ..$ trace : NULL @@ -886,7 +922,7 @@ Code str(mod_checks_message) Output - List of 48 + List of 50 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "mod_del_hub" @@ -941,6 +977,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" @@ -1082,6 +1124,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n_1 :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists_1 :List of 6 ..$ message : chr "Metadata file does not exist at path 'model-metadata/team1-goodmodel.yml' or\n "| __truncated__ ..$ trace : NULL @@ -1203,7 +1251,7 @@ Code str(mod_checks_none) Output - List of 45 + List of 47 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "mod_del_hub" @@ -1240,6 +1288,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" @@ -1381,6 +1435,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n_1 :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists_1 :List of 6 ..$ message : chr "Metadata file does not exist at path 'model-metadata/team1-goodmodel.yml' or\n "| __truncated__ ..$ trace : NULL @@ -1502,7 +1562,7 @@ Code str(mod_checks_in_window) Output - List of 47 + List of 49 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "mod_del_hub" @@ -1555,6 +1615,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : 'fs_path' chr "hub-baseline/2022-10-08-hub-baseline.csv" @@ -1696,6 +1762,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n_1 :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists_1 :List of 6 ..$ message : chr "Metadata file does not exist at path 'model-metadata/team1-goodmodel.yml' or\n "| __truncated__ ..$ trace : NULL @@ -1856,7 +1928,7 @@ Code str(checks) Output - List of 19 + List of 20 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "valid_sb_hub-old" @@ -1893,6 +1965,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " ..$ where : 'fs_path' chr "team1-goodmodel/2022-10-22-team1-goodmodel.csv" diff --git a/tests/testthat/_snaps/validate_submission.md b/tests/testthat/_snaps/validate_submission.md index ca6c6e29..2dcf19f6 100644 --- a/tests/testthat/_snaps/validate_submission.md +++ b/tests/testthat/_snaps/validate_submission.md @@ -4,7 +4,7 @@ str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", skip_submit_window_check = TRUE, skip_check_config = TRUE)) Output - List of 18 + List of 19 $ file_exists :List of 4 ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -35,6 +35,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -146,7 +152,7 @@ str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-15-hub-baseline.csv", skip_submit_window_check = TRUE, skip_check_config = TRUE)) Output - Classes 'hub_validations', 'list' hidden list of 10 + Classes 'hub_validations', 'list' hidden list of 11 $ file_exists :List of 4 ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-15-hub-baseline.csv'. \n " ..$ where : chr "team1-goodmodel/2022-10-15-hub-baseline.csv" @@ -179,6 +185,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "team1-goodmodel/2022-10-15-hub-baseline.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : chr "team1-goodmodel/2022-10-15-hub-baseline.csv" @@ -219,7 +231,7 @@ round_id_col = "random_col", skip_submit_window_check = TRUE, skip_check_config = TRUE)) Output - Classes 'hub_validations', 'list' hidden list of 9 + Classes 'hub_validations', 'list' hidden list of 10 $ file_exists :List of 4 ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -250,6 +262,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -285,7 +303,7 @@ str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", skip_submit_window_check = TRUE)) Output - List of 19 + List of 20 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "simple" @@ -322,6 +340,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" @@ -445,7 +469,7 @@ str(validate_submission(hub_path = test_path("testdata/hub"), file_path = "hub-baseline/2023-04-24-hub-baseline.csv", skip_submit_window_check = TRUE)) Output - List of 19 + List of 20 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "hub" @@ -482,6 +506,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/hub-baseline.yml'. \n " ..$ where : chr "hub-baseline/2023-04-24-hub-baseline.csv" @@ -578,7 +608,7 @@ str(validate_submission(hub_path = test_path("testdata/hub-nul"), file_path = "team-model/2023-11-26-team-model.parquet", skip_submit_window_check = TRUE)) Output - List of 19 + List of 20 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "hub-nul" @@ -615,6 +645,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "team-model/2023-11-26-team-model.parquet" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team-model.yaml'. \n " ..$ where : chr "team-model/2023-11-26-team-model.parquet" @@ -711,7 +747,7 @@ str(validate_submission(hub_path = test_path("testdata/hub-nul"), file_path = "team-model/2023-11-19-team-model.parquet", skip_submit_window_check = TRUE)) Output - List of 19 + List of 20 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "hub-nul" @@ -748,6 +784,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "team-model/2023-11-19-team-model.parquet" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/team-model.yaml'. \n " ..$ where : chr "team-model/2023-11-19-team-model.parquet" @@ -957,7 +999,7 @@ file_path = "flu-base/2022-10-22-flu-base.csv", skip_submit_window_check = TRUE, derived_task_ids = "target_end_date")) Output - List of 24 + List of 25 $ valid_config :List of 4 ..$ message : chr "All hub config files are valid. \n " ..$ where : chr "samples" @@ -994,6 +1036,12 @@ ..$ call : chr "check_file_format" ..$ use_cli_format: logi TRUE ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 4 + ..$ message : chr "Number of accepted model output files per round met. \n " + ..$ where : chr "flu-base/2022-10-22-flu-base.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... $ metadata_exists :List of 4 ..$ message : chr "Metadata file exists at path 'model-metadata/flu-base.yml'. \n " ..$ where : chr "flu-base/2022-10-22-flu-base.csv" From 78dec4c8ceb7e79f66b1b26a262c96d18e255560 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 30 Oct 2024 11:37:44 +0200 Subject: [PATCH 3/5] Add test for duplicate file in the same round --- tests/testthat/_snaps/validate_submission.md | 82 ++++++++++++++++++++ tests/testthat/test-validate_submission.R | 37 +++++++++ 2 files changed, 119 insertions(+) diff --git a/tests/testthat/_snaps/validate_submission.md b/tests/testthat/_snaps/validate_submission.md index 2dcf19f6..cb8fceaf 100644 --- a/tests/testthat/_snaps/validate_submission.md +++ b/tests/testthat/_snaps/validate_submission.md @@ -1214,3 +1214,85 @@ Task ID combinations of non compound task id values consistent across modeling task samples. +# validate_submission returns check_failure when duplicate files per round exist + + Code + str(dup_model_out_val) + Output + Classes 'hub_validations', 'list' hidden list of 10 + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_n :List of 6 + ..$ message : chr "Number of accepted model output files per round exceeded. \n Should be 1 but pre-existing round\n submissi"| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_n" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_error" "error" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 6 + ..$ message : chr "`round_id_col` name must be valid. \n Must be one of\n \"location\", \"ref"| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_error" "error" ... + $ unique_round_id :List of 6 + ..$ message : chr "`round_id_col` name must be valid. \n Must be one of\n \"location\", \"ref"| __truncated__ + ..$ trace : NULL + ..$ parent : NULL + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + +--- + + Code + dup_model_out_val[["file_n"]] + Output + + Error: + ! Number of accepted model output files per round exceeded. Should be 1 but pre-existing round submission file "team1-goodmodel/2022-10-08-team1-goodmodel.parquet" found in team directory. + diff --git a/tests/testthat/test-validate_submission.R b/tests/testthat/test-validate_submission.R index aac41425..504bc49f 100644 --- a/tests/testthat/test-validate_submission.R +++ b/tests/testthat/test-validate_submission.R @@ -380,3 +380,40 @@ test_that("Ignoring derived_task_ids in validate_submission works", { )] ) }) + +test_that("validate_submission returns check_failure when duplicate files per round exist", { + skip_if_offline() + + copy_path <- withr::local_tempdir() + fs::dir_copy( + system.file("testhubs/simple", package = "hubValidations"), + copy_path + ) + hub_path <- fs::path(copy_path, "simple") + + # Create duplicate parquet file + read_model_out_file( + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + hub_path = hub_path, + coerce_types = "hub" + ) |> + arrow::write_parquet( + fs::path( + hub_path, + "model-output/team1-goodmodel/2022-10-08-team1-goodmodel.parquet" + ) + ) + + dup_model_out_val <- validate_submission( + hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + skip_submit_window_check = TRUE, + skip_check_config = TRUE + ) + expect_snapshot( + str(dup_model_out_val) + ) + expect_snapshot( + dup_model_out_val[["file_n"]] + ) +}) From 166b6d85a355256bb0e546de1165073ecce10429 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 30 Oct 2024 11:38:18 +0200 Subject: [PATCH 4/5] Document --- man/validate_model_file.Rd | 7 +++++++ man/validate_pr.Rd | 7 +++++++ man/validate_submission.Rd | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/man/validate_model_file.Rd b/man/validate_model_file.Rd index 8ddd3f28..7d5c9527 100644 --- a/man/validate_model_file.Rd +++ b/man/validate_model_file.Rd @@ -82,6 +82,13 @@ Details of checks performed by \code{validate_model_file()}\if{html}{\out{ check_error + + + + + + + diff --git a/man/validate_pr.Rd b/man/validate_pr.Rd index c5c2a0cc..2ef3c77a 100644 --- a/man/validate_pr.Rd +++ b/man/validate_pr.Rd @@ -181,6 +181,13 @@ Details of checks performed by \code{validate_submission()}\if{html}{\out{
file_n Number of submission files per round per team does not exceed allowed number FALSE check_failure
metadata_exists Model metadata file exists in expected location
check_error + + + + + + + diff --git a/man/validate_submission.Rd b/man/validate_submission.Rd index cc3b0702..0b8dfc99 100644 --- a/man/validate_submission.Rd +++ b/man/validate_submission.Rd @@ -145,6 +145,13 @@ Details of checks performed by \code{validate_submission()}\if{html}{\out{
file_n Number of submission files per round per team does not exceed allowed number FALSE check_failure
metadata_exists Model metadata file exists in expected location
check_error + + + + + + + From 835acde04166edcc3d49798b28114c6525f6df8f Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Wed, 30 Oct 2024 11:42:51 +0200 Subject: [PATCH 5/5] Update NEWS.md --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index e6c6445c..41a77868 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # hubValidations (development version) +* Downgrade result of missing model metadata file check from `check_error` to `check_failure` and suppress early return in case of check failure in `validate_model_file()` (#138). +* Add `check_file_n()` function to validate that the number of files submitted per round does not exceed the allowed number of submissions per team (#139). + # hubValidations 0.7.1 * Updated documentation for custom validations:
file_n Number of submission files per round per team does not exceed allowed number FALSE check_failure
metadata_exists Model metadata file exists in expected location