Skip to content

Commit

Permalink
fixing cleanup error
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffersonfparil committed Dec 22, 2024
1 parent 327ea2c commit 5b23377
Showing 1 changed file with 59 additions and 35 deletions.
94 changes: 59 additions & 35 deletions R/cross_validation.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
"Error in cross_validation::fn_cv_1(...). ",
"Input data (list_merged) is an error type."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if ((i < 1) | (i > nrow(df_params))) {
error = methods::new("gpError",
Expand All @@ -144,7 +144,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
"Error in cross_validation::fn_cv_1(...). ",
"The index (i) of df_params is beyond the number of rows in df_params (may also be less than 1)."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (sum((colnames(df_params) == c("rep", "fold", "model"))) != 3) {
error = methods::new("gpError",
Expand All @@ -154,7 +154,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
"The data frame of parameters is incorrect. We are expecting the following columns in order: 'rep', 'fold', and 'model'.",
"The supplied data frame has the following columns or fields: ", paste(colnames(df_params), collapse=", ")
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (nrow(mat_idx_shuffle) != nrow(list_merged$G)) {
error = methods::new("gpError",
Expand All @@ -165,7 +165,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
"does not match the number of samples in the input genotype and phenotype (and covariate) data (",
nrow(list_merged$G) , " rows)."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (ncol(mat_idx_shuffle) != max(df_params$rep)) {
error = methods::new("gpError",
Expand All @@ -175,7 +175,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
"The number of columns in the shuffling matrix (mat_idx_shuffle; ", ncol(mat_idx_shuffle), " columns) ",
"does not match the replications requested (", max(df_params$rep) , " replications)."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (length(vec_set_partition_groupings) != nrow(list_merged$G)) {
error = methods::new("gpError",
Expand All @@ -186,7 +186,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
length(vec_set_partition_groupings), " elements) does not match the number of samples in ",
"the input genotype and phenotype (and covariate) data (", nrow(list_merged$G) , " rows)."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (sum(range(vec_set_partition_groupings) == range(df_params$fold)) != 2) {
error = methods::new("gpError",
Expand All @@ -198,7 +198,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
"does not match the number of folds requested (fold ", min(df_params$fold), " to fold ",
max(df_params$fold), ")."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define prefix of intermediate output files
if ((prefix_tmp == "") | is.na(prefix_tmp) | is.null(prefix_tmp)) {
Expand Down Expand Up @@ -233,7 +233,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
"Error in cross_validation::fn_cv_1(...). ",
"Unable to fit the model, ", model, " and/or assess genomic prediction accuracy."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### One-liner data frame of the prediction performance metrics
df_metrics = data.frame(
Expand Down Expand Up @@ -380,7 +380,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
"Error in cross_validation::fn_cross_validation_preparation(...). ",
"Input data (list_merged) is an error type."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (cv_type == 1) {
###############################
Expand All @@ -401,7 +401,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
"Error in cross_validation::fn_cross_validation_preparation(...). ",
"The size of the data set is too small, n= ", n, "."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (n_folds < 2) {
error = methods::new("gpError",
Expand All @@ -410,7 +410,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
"Error in cross_validation::fn_cross_validation_preparation(...). ",
"The size of the data set is too small for at least 2-fold cross-validation, n= ", n, "."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
vec_set_partition_groupings = rep(1:n_folds, each=set_size)
if (length(vec_set_partition_groupings) < n) {
Expand Down Expand Up @@ -451,7 +451,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
"Cannot perform pairwise-population cross-validation (cv_type=2) ",
"because the number of populations (", n_folds, " populations) in the data set is not equal to 2."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### No shuffling needed as cross-validation is not replicated
mat_idx_shuffle = matrix(1:n, ncol=1)
Expand Down Expand Up @@ -481,7 +481,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
"Cannot perform leave-one-population-out cross-validation (cv_type=3) ",
"because there is only one population in the data set."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### No shuffling needed as cross-validation is not replicated
mat_idx_shuffle = matrix(1:n, ncol=1)
Expand All @@ -506,7 +506,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
" --> '2' for pairwise-population cross-validation, e.g. training on population A and validation on population B. ",
" --> '3' for leave-one-population-out cross-validation, e.g. training on populations 1 to 9 and validation on population 10."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Memory allocation error handling
if (methods::is(list_mem, "gpError")) {
Expand All @@ -517,7 +517,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
"Failed to estimate memory allocation requirements for parallel computations ",
"and the maximum number of threads which can be used to avoid out-of-memory (OOM) error."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Print the full list of cross-validation sets, replications and models combinations
if (verbose) {
Expand Down Expand Up @@ -655,7 +655,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
"Error in cross_validation::fn_cross_validation_within_population(...). ",
"Input data (list_merged) is an error type."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the output directory
if (!is.null(dir_output)) {
Expand All @@ -675,7 +675,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
"Unable to create the output directory: ", dir_output, ". ",
"Please check your permissions to write into that directory."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Determine the number of populations
vec_populations = sort(unique(list_merged$list_pheno$pop))
Expand All @@ -693,7 +693,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
"Error in cross_validation::fn_cross_validation_within_population(...). ",
"Failed to subset the data set."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
list_cv_params = fn_cross_validation_preparation(
Expand All @@ -711,7 +711,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
"Error in cross_validation::fn_cross_validation_within_population(...). ",
"Failed to define the cross-validation parameters."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (list_cv_params$list_mem$n_threads <= 1) {
if (verbose) {
Expand All @@ -735,7 +735,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
mc.cores=min(c(n_threads, list_cv_params$n_threads)))
for (idx in 1:length(list_list_perf)) {
if (methods::is(list_list_perf[[idx]], "gpError")) {
cat(list_list_perf[[idx]]@message)
cat(paste0(list_list_perf[[idx]]@message, "\n"))
list_list_perf[[idx]] = NA
}
}
Expand Down Expand Up @@ -794,6 +794,12 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
vec_fname_within_Rds = c(vec_fname_within_Rds, fname_within_Rds)
### Clean-up temporary files generated by fn_cv_1 in parallel
for (list_perf in list_list_perf) {
if (is.na(utils::head(list_perf[[1]], n=1)[1])) {
next
}
if (is.na(utils::head(list_perf$df_metrics, n=1)[1]) | is.na(utils::head(list_perf$df_y_validation, n=1)[1])) {
next
}
unlink(list_perf$fname_metrics_out)
unlink(list_perf$fname_y_validation_out)
}
Expand Down Expand Up @@ -960,7 +966,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
"Error in cross_validation::fn_cross_validation_across_populations_bulk(...). ",
"Input data (list_merged) is an error type."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the output directory
if (!is.null(dir_output)) {
Expand All @@ -980,7 +986,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
"Unable to create the output directory: ", dir_output, ". ",
"Please check your permissions to write into that directory."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Check if we have more than 1 population
vec_populations = sort(unique(list_merged$list_pheno$pop))
Expand All @@ -992,7 +998,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
"Cannot perform bulked across populations cross-validation ",
"because there is only 1 population in the data set."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
list_cv_params = fn_cross_validation_preparation(
Expand All @@ -1010,7 +1016,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
"Error in cross_validation::fn_cross_validation_within_population(...). ",
"Failed to define the cross-validation parameters."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (list_cv_params$list_mem$n_threads <= 1) {
if (verbose) {
Expand All @@ -1034,7 +1040,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
mc.cores=min(c(n_threads, list_cv_params$n_threads)))
for (idx in 1:length(list_list_perf)) {
if (methods::is(list_list_perf[[idx]], "gpError")) {
cat(list_list_perf[[idx]]@message)
cat(paste0(list_list_perf[[idx]]@message, "\n"))
list_list_perf[[idx]] = NA
}
}
Expand Down Expand Up @@ -1089,6 +1095,12 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
file=fname_across_bulk_Rds)
### Clean-up temporary files generated by fn_cv_1 in parallel
for (list_perf in list_list_perf) {
if (is.na(utils::head(list_perf[[1]], n=1)[1])) {
next
}
if (is.na(utils::head(list_perf$df_metrics, n=1)[1]) | is.na(utils::head(list_perf$df_y_validation, n=1)[1])) {
next
}
unlink(list_perf$fname_metrics_out)
unlink(list_perf$fname_y_validation_out)
}
Expand Down Expand Up @@ -1216,7 +1228,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
"Error in cross_validation::fn_cross_validation_across_populations_pairwise(...). ",
"Input data (list_merged) is an error type."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the output directory
if (!is.null(dir_output)) {
Expand All @@ -1236,7 +1248,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
"Unable to create the output directory: ", dir_output, ". ",
"Please check your permissions to write into that directory."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Determine the number of populations
vec_populations = sort(unique(list_merged$list_pheno$pop))
Expand All @@ -1248,7 +1260,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
"Cannot perform pairwise-population cross-validation ",
"because there is only 1 population in the data set."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Instantiate the vector of Rds filenames containing the temporary output data per population
vec_fname_across_pairwise_Rds = c()
Expand All @@ -1271,7 +1283,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
"Error in cross_validation::fn_cross_validation_across_populations_pairwise(...). ",
"Failed to subset the data set."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
list_cv_params = fn_cross_validation_preparation(
Expand All @@ -1289,7 +1301,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
"Error in cross_validation::fn_cross_validation_within_population(...). ",
"Failed to define the cross-validation parameters."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (list_cv_params$list_mem$n_threads <= 1) {
if (verbose) {
Expand All @@ -1313,7 +1325,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
mc.cores=min(c(n_threads, list_cv_params$n_threads)))
for (idx in 1:length(list_list_perf)) {
if (methods::is(list_list_perf[[idx]], "gpError")) {
cat(list_list_perf[[idx]]@message)
cat(paste0(list_list_perf[[idx]]@message, "\n"))
list_list_perf[[idx]] = NA
}
}
Expand Down Expand Up @@ -1368,6 +1380,12 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
vec_fname_across_pairwise_Rds = c(vec_fname_across_pairwise_Rds, fname_across_pairwise_Rds)
### Clean-up temporary files generated by fn_cv_1 in parallel
for (list_perf in list_list_perf) {
if (is.na(utils::head(list_perf[[1]], n=1)[1])) {
next
}
if (is.na(utils::head(list_perf$df_metrics, n=1)[1]) | is.na(utils::head(list_perf$df_y_validation, n=1)[1])) {
next
}
unlink(list_perf$fname_metrics_out)
unlink(list_perf$fname_y_validation_out)
}
Expand Down Expand Up @@ -1526,7 +1544,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
"Error in cross_validation::fn_cross_validation_across_populations_lopo(...). ",
"Input data (list_merged) is an error type."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the output directory
if (!is.null(dir_output)) {
Expand All @@ -1546,7 +1564,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
"Unable to create the output directory: ", dir_output, ". ",
"Please check your permissions to write into that directory."
))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
list_cv_params = fn_cross_validation_preparation(
Expand All @@ -1564,7 +1582,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
"Error in cross_validation::fn_cross_validation_across_populations_lopo(...). ",
"Failed to instantiate the cross-validation parameters."
)))
cat(error@message); return(error)
cat(paste0(error@message, "\n")); return(error)
}
if (list_cv_params$list_mem$n_threads <= 1) {
if (verbose) {
Expand All @@ -1588,7 +1606,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
mc.cores=min(c(n_threads, list_cv_params$n_threads)))
for (idx in 1:length(list_list_perf)) {
if (methods::is(list_list_perf[[idx]], "gpError")) {
cat(list_list_perf[[idx]]@message)
cat(paste0(list_list_perf[[idx]]@message, "\n"))
list_list_perf[[idx]] = NA
}
}
Expand Down Expand Up @@ -1641,6 +1659,12 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
file=fname_across_lopo_Rds)
### Clean-up temporary files generated by fn_cv_1 in parallel
for (list_perf in list_list_perf) {
if (is.na(utils::head(list_perf[[1]], n=1)[1])) {
next
}
if (is.na(utils::head(list_perf$df_metrics, n=1)[1]) | is.na(utils::head(list_perf$df_y_validation, n=1)[1])) {
next
}
unlink(list_perf$fname_metrics_out)
unlink(list_perf$fname_y_validation_out)
}
Expand Down

0 comments on commit 5b23377

Please sign in to comment.