excluding missing phenotypes in the cross-validation and only adding …

…them for genomic prediction per se, i.e. last step
jeffersonfparil · Dec 13, 2024 · 1d2aee6 · 1d2aee6
1 parent 5f38085
commit 1d2aee6
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 130 deletions.
diff --git a/R/cross_validation.R b/R/cross_validation.R
@@ -135,7 +135,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                     "Error in cross_validation::fn_cv_1(...). ",
                     "Input data (list_merged) is an error type."
                 )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if ((i < 1) | (i > nrow(df_params))) {
         error = methods::new("gpError",
@@ -144,7 +144,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                 "Error in cross_validation::fn_cv_1(...). ",
                 "The index (i) of df_params is beyond the number of rows in df_params (may also be less than 1)."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (sum((colnames(df_params) == c("rep", "fold", "model"))) != 3) {
         error = methods::new("gpError",
@@ -154,7 +154,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                 "The data frame of parameters is incorrect. We are expecting the following columns in order: 'rep', 'fold', and 'model'.",
                 "The supplied data frame has the following columns or fields: ", paste(colnames(df_params), collapse=", ")
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (nrow(mat_idx_shuffle) != nrow(list_merged$G)) {
         error = methods::new("gpError",
@@ -165,7 +165,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                 "does not match the number of samples in the input genotype and phenotype (and covariate) data (",
                 nrow(list_merged$G) , " rows)."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (ncol(mat_idx_shuffle) != max(df_params$rep)) {
         error = methods::new("gpError",
@@ -175,7 +175,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                 "The number of columns in the shuffling matrix (mat_idx_shuffle; ", ncol(mat_idx_shuffle), " columns) ",
                 "does not match the replications requested (", max(df_params$rep) , " replications)."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (length(vec_set_partition_groupings) != nrow(list_merged$G)) {
         error = methods::new("gpError",
@@ -186,7 +186,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                 length(vec_set_partition_groupings), " elements) does not match the number of samples in ",
                 "the input genotype and phenotype (and covariate) data (", nrow(list_merged$G) , " rows)."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (sum(range(vec_set_partition_groupings) == range(df_params$fold)) != 2) {
         error = methods::new("gpError",
@@ -198,7 +198,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                 "does not match the number of folds requested (fold ", min(df_params$fold), " to fold ",
                 max(df_params$fold), ")."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Define prefix of intermediate output files
     if ((prefix_tmp == "") | is.na(prefix_tmp) | is.null(prefix_tmp)) {
@@ -233,7 +233,7 @@ fn_cv_1 = function(i, list_merged, df_params, mat_idx_shuffle, vec_set_partition
                 "Error in cross_validation::fn_cv_1(...). ",
                 "Unable to fit the model, ", model, " and/or assess genomic prediction accuracy."
             )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### One-liner data frame of the prediction performance metrics
     df_metrics = data.frame(
@@ -380,7 +380,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
                     "Error in cross_validation::fn_cross_validation_preparation(...). ",
                     "Input data (list_merged) is an error type."
                 )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (cv_type == 1) {
         ###############################
@@ -400,7 +400,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
                     "Error in cross_validation::fn_cross_validation_preparation(...). ",
                     "The size of the data set is too small, n= ", n, "."
                 ))
-            cat(error@message; return(error)
+            cat(error@message); return(error)
         }
         vec_set_partition_groupings = rep(1:n_folds, each=set_size)
         if (length(vec_set_partition_groupings) < n) {
@@ -441,7 +441,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
                     "Cannot perform pairwise-population cross-validation (cv_type=2) ",
                     "because the number of populations (", n_folds, " populations) in the data set is not equal to 2."
                 ))
-            cat(error@message; return(error)
+            cat(error@message); return(error)
         }
         ### No shuffling needed as cross-validation is not replicated
         mat_idx_shuffle = matrix(1:n, ncol=1)
@@ -471,7 +471,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
                     "Cannot perform leave-one-population-out cross-validation (cv_type=3) ",
                     "because there is only one population in the data set."
                 ))
-            cat(error@message; return(error)
+            cat(error@message); return(error)
         }
         ### No shuffling needed as cross-validation is not replicated
         mat_idx_shuffle = matrix(1:n, ncol=1)
@@ -496,7 +496,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
                 "  --> '2' for pairwise-population cross-validation, e.g. training on population A and validation on population B. ",
                 "  --> '3' for leave-one-population-out cross-validation, e.g. training on populations 1 to 9 and validation on population 10."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Memory allocation error handling
     if (methods::is(list_mem, "gpError")) {
@@ -507,7 +507,7 @@ fn_cross_validation_preparation = function(list_merged, cv_type=1, n_folds=10, n
                 "Failed to estimate memory allocation requirements for parallel computations ",
                 "and the maximum number of threads which can be used to avoid out-of-memory (OOM) error."
             )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Print the full list of cross-validation sets, replications and models combinations
     if (verbose) {
@@ -645,7 +645,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
                     "Error in cross_validation::fn_cross_validation_within_population(...). ",
                     "Input data (list_merged) is an error type."
                 )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Define the output directory
     if (!is.null(dir_output)) {
@@ -665,7 +665,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
                 "Unable to create the output directory: ", dir_output, ". ",
                 "Please check your permissions to write into that directory."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Determine the number of populations
     vec_populations = sort(unique(list_merged$list_pheno$pop))
@@ -683,7 +683,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
                     "Error in cross_validation::fn_cross_validation_within_population(...). ",
                     "Failed to subset the data set."
                 )))
-            cat(error@message; return(error)
+            cat(error@message); return(error)
         }
         ### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
         list_cv_params = fn_cross_validation_preparation(
@@ -701,7 +701,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
                     "Error in cross_validation::fn_cross_validation_within_population(...). ",
                     "Failed to define the cross-validation parameters."
                 )))
-            cat(error@message; return(error)
+            cat(error@message); return(error)
         }
         if (list_cv_params$list_mem$n_threads <= 1) {
             if (verbose) {
@@ -733,7 +733,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
                             "Please check re-run cross_validation::fn_cross_validation_within_population(...) with ",
                             "bool_parallel=FALSE to identify the error."
                         )))
-                    cat(error@message; return(error)
+                    cat(error@message); return(error)
                 }
             }
         } else {
@@ -760,7 +760,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
                             "fold: ", list_cv_params$df_params$fold[i], ", and ",
                             "model: ", list_cv_params$df_params$model[i], "."
                         )))
-                    cat(error@message; return(error)
+                    cat(error@message); return(error)
                 }
                 eval(parse(text=paste0("list_list_perf$`", i, "` = list_perf")))
             }
@@ -780,7 +780,7 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
                         "fold: ", list_cv_params$df_params$fold[i], ", and ",
                         "model: ", list_cv_params$df_params$model[i], "."
                     )))
-                cat(error@message; return(error)
+                cat(error@message); return(error)
             }
             if (is.null(df_metrics) & is.null(df_y_validation)) {
                 df_metrics = list_perf$df_metrics
@@ -968,7 +968,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
                     "Error in cross_validation::fn_cross_validation_across_populations_bulk(...). ",
                     "Input data (list_merged) is an error type."
                 )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Define the output directory
     if (!is.null(dir_output)) {
@@ -988,7 +988,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
                 "Unable to create the output directory: ", dir_output, ". ",
                 "Please check your permissions to write into that directory."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Check if we have more than 1 population
     vec_populations = sort(unique(list_merged$list_pheno$pop))
@@ -1000,7 +1000,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
                 "Cannot perform bulked across populations cross-validation ",
                 "because there is only 1 population in the data set."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
     list_cv_params = fn_cross_validation_preparation(
@@ -1018,7 +1018,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
                 "Error in cross_validation::fn_cross_validation_within_population(...). ",
                 "Failed to define the cross-validation parameters."
             )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (list_cv_params$list_mem$n_threads <= 1) {
         if (verbose) {
@@ -1051,7 +1051,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
                         "Please check re-run cross_validation::fn_cross_validation_across_populations_bulk(...) with ",
                         "bool_parallel=FALSE to identify the error."
                     )))
-                cat(error@message; return(error)
+                cat(error@message); return(error)
             }
         }
     } else {
@@ -1079,7 +1079,7 @@ fn_cross_validation_across_populations_bulk = function(list_merged, n_folds=10,
                         "fold: ", list_cv_params$df_params$fold[i], ", and ",
                         "model: ", list_cv_params$df_params$model[i], "."
                     )))
-                cat(error@message; return(error)
+                cat(error@message); return(error)
             }
             eval(parse(text=paste0("list_list_perf$`", i, "` = list_perf")))
         }
@@ -1233,7 +1233,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
                     "Error in cross_validation::fn_cross_validation_across_populations_pairwise(...). ",
                     "Input data (list_merged) is an error type."
                 )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Define the output directory
     if (!is.null(dir_output)) {
@@ -1253,7 +1253,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
                 "Unable to create the output directory: ", dir_output, ". ",
                 "Please check your permissions to write into that directory."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Determine the number of populations
     vec_populations = sort(unique(list_merged$list_pheno$pop))
@@ -1265,7 +1265,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
                 "Cannot perform pairwise-population cross-validation ",
                 "because there is only 1 population in the data set."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Instantiate the vector of Rds filenames containing the temporary output data per population
     vec_fname_across_pairwise_Rds = c()
@@ -1288,7 +1288,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
                         "Error in cross_validation::fn_cross_validation_across_populations_pairwise(...). ",
                         "Failed to subset the data set."
                     )))
-                cat(error@message; return(error)
+                cat(error@message); return(error)
             }
             ### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
             list_cv_params = fn_cross_validation_preparation(
@@ -1306,7 +1306,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
                         "Error in cross_validation::fn_cross_validation_within_population(...). ",
                         "Failed to define the cross-validation parameters."
                     )))
-                cat(error@message; return(error)
+                cat(error@message); return(error)
             }
             if (list_cv_params$list_mem$n_threads <= 1) {
                 if (verbose) {
@@ -1338,7 +1338,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
                                 "Please check re-run cross_validation::fn_cross_validation_across_populations_pairwise(...) with ",
                                 "bool_parallel=FALSE to identify the error."
                             )))
-                        cat(error@message; return(error)
+                        cat(error@message); return(error)
                     }
                 }
             } else {
@@ -1366,7 +1366,7 @@ fn_cross_validation_across_populations_pairwise = function(list_merged,
                                 "fold: ", list_cv_params$df_params$fold[i], ", and ",
                                 "model: ", list_cv_params$df_params$model[i], "."
                             )))
-                        cat(error@message; return(error)
+                        cat(error@message); return(error)
                     }
                     eval(parse(text=paste0("list_list_perf$`", i, "` = list_perf")))
                 }
@@ -1550,7 +1550,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
                     "Error in cross_validation::fn_cross_validation_across_populations_lopo(...). ",
                     "Input data (list_merged) is an error type."
                 )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Define the output directory
     if (!is.null(dir_output)) {
@@ -1570,7 +1570,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
                 "Unable to create the output directory: ", dir_output, ". ",
                 "Please check your permissions to write into that directory."
             ))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     ### Define the cross-validation parameters as well as the maximum number of threads we can safely use in parallel
     list_cv_params = fn_cross_validation_preparation(
@@ -1588,7 +1588,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
                 "Error in cross_validation::fn_cross_validation_across_populations_lopo(...). ",
                 "Failed to instantiate the cross-validation parameters."
             )))
-        cat(error@message; return(error)
+        cat(error@message); return(error)
     }
     if (list_cv_params$list_mem$n_threads <= 1) {
         if (verbose) {
@@ -1620,7 +1620,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
                         "Please check re-run cross_validation::fn_cross_validation_across_populations_lopo(...) with ",
                         "bool_parallel=FALSE to identify the error."
                     )))
-                cat(error@message; return(error)
+                cat(error@message); return(error)
             }
         }
     } else {
@@ -1647,7 +1647,7 @@ fn_cross_validation_across_populations_lopo = function(list_merged,
                         "fold: ", list_cv_params$df_params$fold[i], ", and ",
                         "model: ", list_cv_params$df_params$model[i], "."
                     )))
-                cat(error@message; return(error)
+                cat(error@message); return(error)
             }
             eval(parse(text=paste0("list_list_perf$`", i, "` = list_perf")))
         }