Skip to content

Commit

Permalink
Merge pull request #145 from hubverse-org/ak/detect-dup-model-outpus-…
Browse files Browse the repository at this point in the history
…sans-ext/139
  • Loading branch information
annakrystalli authored Oct 30, 2024
2 parents b40c5bd + 835acde commit f0b1406
Show file tree
Hide file tree
Showing 15 changed files with 425 additions and 17 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export(check_config_hub_valid)
export(check_file_exists)
export(check_file_format)
export(check_file_location)
export(check_file_n)
export(check_file_name)
export(check_file_read)
export(check_for_errors)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# hubValidations (development version)

* Downgrade result of missing model metadata file check from `check_error` to `check_failure` and suppress early return in case of check failure in `validate_model_file()` (#138).
* Add `check_file_n()` function to validate that the number of files submitted per round does not exceed the allowed number of submissions per team (#139).

# hubValidations 0.7.1

* Updated documentation for custom validations:
Expand Down
40 changes: 40 additions & 0 deletions R/check_file_n.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Check number of files submitted per round does not exceed the allowed number
#' of submissions per team.
#'
#' @inheritParams check_tbl_col_types
#' @param allowed_n integer(1). The maximum number of files allowed per round.
#' @inherit check_tbl_col_types return
#'
#' @export
check_file_n <- function(file_path, hub_path, allowed_n = 1L) {
checkmate::assert_integer(allowed_n, lower = 1L, len = 1L)
file_name <- basename(file_path)
file_name_sans_ext <- fs::path_ext_remove(file_name)
team_dir <- dirname(abs_file_path(file_path, hub_path))

existing_files <- fs::dir_ls(team_dir, regex = file_name_sans_ext) |>
fs::path_rel(dirname(team_dir)) |>
setdiff(file_path) # Remove file being validated from check
existing_n <- length(existing_files)

check <- existing_n < allowed_n

if (check) {
details <- NULL
} else {
details <- cli::format_inline(
"Should be {.val {allowed_n}} but {cli::qty(existing_n)} pre-existing round
submission file{?s} {.val {existing_files}} found in team directory."
)
}

capture_check_cnd(
check = check,
file_path = file_path,
msg_subject = "Number of accepted model output files per round",
msg_verbs = c("met.", "exceeded."),
msg_attribute = NULL,
error = FALSE,
details = details
)
}
7 changes: 7 additions & 0 deletions R/validate_model_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ validate_model_file <- function(hub_path, file_path,
return(checks)
}

checks$file_n <- try_check(
check_file_n(
file_path = file_path,
hub_path = hub_path
), file_path
)

checks$metadata_exists <- try_check(
check_submission_metadata_file_exists(
hub_path = hub_path,
Expand Down
1 change: 1 addition & 0 deletions inst/check_table.csv
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ file_name,File name valid,TRUE,check_error,validate_model_file,check_file_name,,
file_location,File located in correct team directory,FALSE,check_failure,validate_model_file,check_file_location,,FALSE
round_id_valid,File round ID is valid hub round IDs,TRUE,check_error,validate_model_file,check_valid_round_id,,FALSE
file_format,File format is accepted hub/round format,TRUE,check_error,validate_model_file,check_file_format,,FALSE
file_n,Number of submission files per round per team does not exceed allowed number,FALSE,check_failure,validate_model_file,check_file_n,,FALSE
metadata_exists,Model metadata file exists in expected location,FALSE,check_failure,validate_model_file,check_submission_metadata_file_exists,,FALSE
file_read,File can be read without errors,TRUE,check_error,validate_model_data,check_file_read,,FALSE
valid_round_id_col,Round ID var from config exists in data column names. Skipped if `round_id_from_var` is FALSE in config.,FALSE,check_failure,validate_model_data,check_valid_round_id_col,,FALSE
Expand Down
38 changes: 38 additions & 0 deletions man/check_file_n.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/validate_model_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/validate_pr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/validate_submission.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions tests/testthat/_snaps/check_file_n.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# check_file_n works

Code
check_file_n(file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.parquet",
hub_path)
Output
<error/check_failure>
Error:
! Number of accepted model output files per round exceeded. Should be 1 but pre-existing round submission file "team1-goodmodel/2022-10-08-team1-goodmodel.csv" found in team directory.

16 changes: 15 additions & 1 deletion tests/testthat/_snaps/validate_model_file.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Code
str(validate_model_file(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv"))
Output
List of 6
List of 7
$ file_exists :List of 4
..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
Expand Down Expand Up @@ -34,6 +34,12 @@
..$ call : chr "check_file_format"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_n :List of 4
..$ message : chr "Number of accepted model output files per round met. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_n"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ metadata_exists:List of 4
..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
Expand Down Expand Up @@ -70,6 +76,7 @@
v [file_location]: File directory name matches `model_id` metadata in file name.
v [round_id_valid]: `round_id` is valid.
v [file_format]: File is accepted hub format.
v [file_n]: Number of accepted model output files per round met.
v [metadata_exists]: Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'.

---
Expand All @@ -95,6 +102,7 @@
x [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel"
v [round_id_valid]: `round_id` is valid.
v [file_format]: File is accepted hub format.
v [file_n]: Number of accepted model output files per round met.
v [metadata_exists]: Metadata file exists at path 'model-metadata/hub-baseline.yml'.

# validate_model_file print method work [ansi]
Expand All @@ -110,6 +118,7 @@
v [file_location]: File directory name matches `model_id` metadata in file name.
v [round_id_valid]: `round_id` is valid.
v [file_format]: File is accepted hub format.
v [file_n]: Number of accepted model output files per round met.
v [metadata_exists]: Metadata file exists at path model-metadata/team1-goodmodel.yaml.

---
Expand All @@ -135,6 +144,7 @@
x [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel"
v [round_id_valid]: `round_id` is valid.
v [file_format]: File is accepted hub format.
v [file_n]: Number of accepted model output files per round met.
v [metadata_exists]: Metadata file exists at path model-metadata/hub-baseline.yml.

# validate_model_file print method work [unicode]
Expand All @@ -150,6 +160,7 @@
✔ [file_location]: File directory name matches `model_id` metadata in file name.
✔ [round_id_valid]: `round_id` is valid.
✔ [file_format]: File is accepted hub format.
✔ [file_n]: Number of accepted model output files per round met.
✔ [metadata_exists]: Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'.

---
Expand All @@ -175,6 +186,7 @@
✖ [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel"
✔ [round_id_valid]: `round_id` is valid.
✔ [file_format]: File is accepted hub format.
✔ [file_n]: Number of accepted model output files per round met.
✔ [metadata_exists]: Metadata file exists at path 'model-metadata/hub-baseline.yml'.

# validate_model_file print method work [fancy]
Expand All @@ -190,6 +202,7 @@
✔ [file_location]: File directory name matches `model_id` metadata in file name.
✔ [round_id_valid]: `round_id` is valid.
✔ [file_format]: File is accepted hub format.
✔ [file_n]: Number of accepted model output files per round met.
✔ [metadata_exists]: Metadata file exists at path model-metadata/team1-goodmodel.yaml.

---
Expand All @@ -215,5 +228,6 @@
✖ [file_location]: File directory name must match `model_id` metadata in file name. File should be submitted to directory "hub-baseline" not "team1-goodmodel"
✔ [round_id_valid]: `round_id` is valid.
✔ [file_format]: File is accepted hub format.
✔ [file_n]: Number of accepted model output files per round met.
✔ [metadata_exists]: Metadata file exists at path model-metadata/hub-baseline.yml.

Loading

0 comments on commit f0b1406

Please sign in to comment.