CDCgov · damonbayer · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 15, 2024
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Check if the base directory is provided as an argument
+if [ -z "$1" ]; then
+    echo "Usage: $0 <base_dir>"
+    exit 1
+fi
+
+# Base directory containing subdirectories
+BASE_DIR="$1"
+
+
+# Iterate over each subdirectory in the base directory
+for SUBDIR in "$BASE_DIR"/*/; do
+    # Run the R script with the current subdirectory as the model_dir argument
+    echo "$SUBDIR"
+    # will work once  https://github.com/rstudio/renv/pull/2018 is merged
+    Rscript -e "renv::run(\"post_process.R\", project = \"..\", args = c(\"--model_dir ${SUBDIR}\"))"
+done
+
+
+# # Get the name of the current directory (base_dir)
+base_dir_name=$(basename "$(pwd)")
+
+# Find all forecast_plot.pdf files and combine them using pdfunite
+find . -name "forecast_plot.pdf" | sort | xargs pdfunite - "${BASE_DIR}/${base_dir_name}_all_forecasts.pdf"
+
+echo "Combined PDF created: ${base_dir_name}_all_forecasts.pdf"
@@ -5,12 +5,69 @@ library(cowplot)
 library(glue)
 library(scales)
 library(here)
+library(argparser)
+
+# Create a parser
+p <- arg_parser("Generate forecast figures")
+
+base_dir <- path_dir(model_dir)
 
 theme_set(theme_minimal_grid())
 
 disease_name_formatter <- c("covid-19" = "COVID-19", "influenza" = "Flu")
 
-make_forecast_fig <- function(model_dir) {
+read_pyrenew_samples <- function(inference_data_path,
+                                 filter_bad_chains = TRUE,
+                                 good_chain_tol = 2) {
+  arviz_split <- function(x) {
+    x %>%
+      select(-distribution) %>%
+      split(f = as.factor(x$distribution))
+  }
+
+  pyrenew_samples <-
+    read_csv(inference_data_path) %>%
+    rename_with(\(varname) str_remove_all(varname, "\\(|\\)|\\'|(, \\d+)")) |>
+    rename(
+      .chain = chain,
+      .iteration = draw
+    ) |>
+    mutate(across(c(.chain, .iteration), \(x) as.integer(x + 1))) |>
+    mutate(
+      .draw = tidybayes:::draw_from_chain_and_iteration_(.chain, .iteration),
+      .after = .iteration
+    ) |>
+    pivot_longer(-starts_with("."),
+      names_sep = ", ",
+      names_to = c("distribution", "name")
+    ) |>
+    arviz_split() |>
+    map(\(x) pivot_wider(x, names_from = name) |> tidy_draws())
+
+  if (filter_bad_chains) {
+    good_chains <-
+      pyrenew_samples$log_likelihood %>%
+      pivot_longer(-starts_with(".")) %>%
+      group_by(.iteration, .chain) %>%
+      summarize(value = sum(value)) %>%
+      group_by(.chain) %>%
+      summarize(value = mean(value)) %>%
+      filter(value >= max(value) - 2) %>%
+      pull(.chain)
+  } else {
+    good_chains <- unique(pyrenew_samples$log_likelihood$.chain)
+  }
+
+  good_pyrenew_samples <- map(
+    pyrenew_samples,
+    \(x) filter(x, .chain %in% good_chains)
+  )
+  good_pyrenew_samples
+}
+
+make_forecast_fig <- function(model_dir,
+                              filter_bad_chains = TRUE,
+                              good_chain_tol = 2) {
   disease_name_raw <- base_dir %>%
     path_file() %>%
     str_extract("^.+(?=_r_)")
@@ -20,17 +77,15 @@ make_forecast_fig <- function(model_dir) {
     pluck(1) %>%
     tail(1)
 
-
   data_path <- path(model_dir, "data", ext = "csv")
   inference_data_path <- path(model_dir, "inference_data",
     ext = "csv"
   )
 
-
   dat <- read_csv(data_path) %>%
     arrange(date) %>%
     mutate(time = row_number() - 1) %>%
-    rename(.value = COVID_ED_admissions)
+    rename(.value = ED_admissions)
 
   last_training_date <- dat %>%
     filter(data_type == "train") %>%
@@ -41,31 +96,11 @@ make_forecast_fig <- function(model_dir) {
     pull(date) %>%
     max()
 
-  arviz_split <- function(x) {
-    x %>%
-      select(-distribution) %>%
-      split(f = as.factor(x$distribution))
-  }
-
-  pyrenew_samples <-
-    read_csv(inference_data_path) %>%
-    rename_with(\(varname) str_remove_all(varname, "\\(|\\)|\\'|(, \\d+)")) |>
-    rename(
-      .chain = chain,
-      .iteration = draw
-    ) |>
-    mutate(across(c(.chain, .iteration), \(x) as.integer(x + 1))) |>
-    mutate(
-      .draw = tidybayes:::draw_from_chain_and_iteration_(.chain, .iteration),
-      .after = .iteration
-    ) |>
-    pivot_longer(-starts_with("."),
-      names_sep = ", ",
-      names_to = c("distribution", "name")
-    ) |>
-    arviz_split() |>
-    map(\(x) pivot_wider(x, names_from = name) |> tidy_draws())
 
+  pyrenew_samples <- read_pyrenew_samples(inference_data_path,
+    filter_bad_chains = filter_bad_chains,
+    good_chain_tol = good_chain_tol
+  )
 
   hosp_ci <-
     pyrenew_samples$posterior_predictive %>%
@@ -118,35 +153,34 @@ make_forecast_fig <- function(model_dir) {
   forecast_plot
 }
 
+forecast_fig <- make_forecast_fig(model_dir)
 
-base_dir <- path(here(
+save_plot(
+  filename = path(model_dir, "forecast_plot", ext = "pdf"),
+  plot = forecast_fig,
+  device = cairo_pdf, base_height = 6
+)
+
+
+# Temp code while command line version doesn't work
+base_dir <- path(
   "nssp_demo",
   "private_data",
-  "covid-19_r_2024-10-10_f_2024-04-12_l_2024-10-09_t_2024-10-05"
-))
+  "influenza_r_2024-10-10_f_2024-04-12_l_2024-10-09_t_2024-10-05"
+)
 
+walk(dir_ls(base_dir), function(model_dir) {
+  forecast_fig <- make_forecast_fig(model_dir)
 
-forecast_fig_tbl <-
-  tibble(base_model_dir = dir_ls(base_dir)) %>%
-  filter(
-    path(base_model_dir, "inference_data", ext = "csv") %>%
-      file_exists()
-  ) %>%
-  mutate(forecast_fig = map(base_model_dir, make_forecast_fig)) %>%
-  mutate(figure_path = path(base_model_dir, "forecast_plot", ext = "pdf"))
-
-pwalk(
-  forecast_fig_tbl %>% select(forecast_fig, figure_path),
-  function(forecast_fig, figure_path) {
-    save_plot(
-      filename = figure_path,
-      plot = forecast_fig,
-      device = cairo_pdf, base_height = 6
-    )
-  }
-)
+  save_plot(
+    filename = path(model_dir, "forecast_plot", ext = "pdf"),
+    plot = forecast_fig,
+    device = cairo_pdf, base_height = 6
+  )
+})
 
-str_c(forecast_fig_tbl$figure_path, collapse = " ") %>%
+path(dir_ls(base_dir, type = "directory"), "forecast_plot", ext = "pdf") %>%
+  str_c(collapse = " ") %>%
   str_c(
     path(base_dir,
       glue("{path_file(base_dir)}_all_forecasts"),
@@ -155,5 +189,3 @@ str_c(forecast_fig_tbl$figure_path, collapse = " ") %>%
     sep = " "
   ) %>%
   system2("pdfunite", args = .)
-
-setdiff(usa::state.abb, path_file(forecast_fig_tbl$base_model_dir))
@@ -91,11 +91,11 @@
       "Version": "0.0.0.9000",
       "Source": "GitHub",
       "RemoteType": "github",
-      "RemoteHost": "api.github.com",
       "RemoteUsername": "CDCgov",
       "RemoteRepo": "cfa-epinow2-pipeline",
       "RemoteRef": "main",
-      "RemoteSha": "c04c8f857e49530ffad946ea582f404a42098735",
+      "RemoteSha": "5e79a4e28ac8ecd2d485e2868fa7b4bf0a1b999d",
+      "RemoteHost": "api.github.com",
       "Remotes": "github::epiforecasts/EpiNow2@bcf297cf36a93cc56123bc3c9e8cebfb1421a962",
       "Requirements": [
         "AzureRMR",
@@ -107,7 +107,7 @@
         "duckdb",
         "rlang"
       ],
-      "Hash": "8e012d5b5d7114f9ff707973a74afd79"
+      "Hash": "51dc64d064becda8baf7254f809d3844"
     },
     "DBI": {
       "Package": "DBI",
@@ -328,6 +328,16 @@
       ],
       "Hash": "4f57884290cc75ab22f4af9e9d4ca862"
     },
+    "argparser": {
+      "Package": "argparser",
+      "Version": "0.7.2",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "methods"
+      ],
+      "Hash": "6ae51dd1cd6a2f5784273ab70d41c66c"
+    },
     "arrayhelpers": {
       "Package": "arrayhelpers",
       "Version": "1.1-0",
@@ -340,6 +350,28 @@
       ],
       "Hash": "3d4e52d458784c335af3846f2de64f75"
     },
+    "arrow": {
+      "Package": "arrow",
+      "Version": "17.0.0.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "R6",
+        "assertthat",
+        "bit64",
+        "cpp11",
+        "glue",
+        "methods",
+        "purrr",
+        "rlang",
+        "stats",
+        "tidyselect",
+        "utils",
+        "vctrs"
+      ],
+      "Hash": "14af96cb2973f6a6c220ce9c3e5b02cd"
+    },
     "askpass": {
       "Package": "askpass",
       "Version": "1.2.0",
@@ -350,6 +382,16 @@
       ],
       "Hash": "cad6cf7f1d5f6e906700b9d3e718c796"
     },
+    "assertthat": {
+      "Package": "assertthat",
+      "Version": "0.2.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "tools"
+      ],
+      "Hash": "50c838a310445e954bc13f26f26a6ecf"
+    },
     "backports": {
       "Package": "backports",
       "Version": "1.5.0",
@@ -2190,6 +2232,17 @@
       ],
       "Hash": "f561504ec2897f4d46f0c7657e488ae1"
     },
+    "usa": {
+      "Package": "usa",
+      "Version": "0.1.2",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "tibble"
+      ],
+      "Hash": "85af363a00cbf994a7b5153e053e0acd"
+    },
     "utf8": {
       "Package": "utf8",
       "Version": "1.2.4",