Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modularize Post Processing #55

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions nssp_demo/all_post_process.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

# Check if the base directory is provided as an argument
if [ -z "$1" ]; then
echo "Usage: $0 <base_dir>"
exit 1
fi

# Base directory containing subdirectories
BASE_DIR="$1"


# Iterate over each subdirectory in the base directory
for SUBDIR in "$BASE_DIR"/*/; do
# Run the R script with the current subdirectory as the model_dir argument
echo "$SUBDIR"
# will work once https://github.com/rstudio/renv/pull/2018 is merged
Rscript -e "renv::run(\"post_process.R\", project = \"..\", args = c(\"--model_dir ${SUBDIR}\"))"
done


# # Get the name of the current directory (base_dir)
base_dir_name=$(basename "$(pwd)")

# Find all forecast_plot.pdf files and combine them using pdfunite
find . -name "forecast_plot.pdf" | sort | xargs pdfunite - "${BASE_DIR}/${base_dir_name}_all_forecasts.pdf"

echo "Combined PDF created: ${base_dir_name}_all_forecasts.pdf"
138 changes: 85 additions & 53 deletions nssp_demo/post_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,69 @@ library(cowplot)
library(glue)
library(scales)
library(here)
library(argparser)

# Create a parser
p <- arg_parser("Generate forecast figures")

base_dir <- path_dir(model_dir)

theme_set(theme_minimal_grid())

disease_name_formatter <- c("covid-19" = "COVID-19", "influenza" = "Flu")

make_forecast_fig <- function(model_dir) {
read_pyrenew_samples <- function(inference_data_path,
filter_bad_chains = TRUE,
good_chain_tol = 2) {
arviz_split <- function(x) {
x %>%
select(-distribution) %>%
split(f = as.factor(x$distribution))
}

pyrenew_samples <-
read_csv(inference_data_path) %>%
rename_with(\(varname) str_remove_all(varname, "\\(|\\)|\\'|(, \\d+)")) |>
rename(
.chain = chain,
.iteration = draw
) |>
mutate(across(c(.chain, .iteration), \(x) as.integer(x + 1))) |>
mutate(
.draw = tidybayes:::draw_from_chain_and_iteration_(.chain, .iteration),
.after = .iteration
) |>
pivot_longer(-starts_with("."),
names_sep = ", ",
names_to = c("distribution", "name")
) |>
arviz_split() |>
map(\(x) pivot_wider(x, names_from = name) |> tidy_draws())

if (filter_bad_chains) {
good_chains <-
pyrenew_samples$log_likelihood %>%
pivot_longer(-starts_with(".")) %>%
group_by(.iteration, .chain) %>%
summarize(value = sum(value)) %>%
group_by(.chain) %>%
summarize(value = mean(value)) %>%
filter(value >= max(value) - 2) %>%
pull(.chain)
} else {
good_chains <- unique(pyrenew_samples$log_likelihood$.chain)
}

good_pyrenew_samples <- map(
pyrenew_samples,
\(x) filter(x, .chain %in% good_chains)
)
good_pyrenew_samples
}

make_forecast_fig <- function(model_dir,
filter_bad_chains = TRUE,
good_chain_tol = 2) {
disease_name_raw <- base_dir %>%
path_file() %>%
str_extract("^.+(?=_r_)")
Expand All @@ -20,17 +77,15 @@ make_forecast_fig <- function(model_dir) {
pluck(1) %>%
tail(1)


data_path <- path(model_dir, "data", ext = "csv")
inference_data_path <- path(model_dir, "inference_data",
ext = "csv"
)


dat <- read_csv(data_path) %>%
arrange(date) %>%
mutate(time = row_number() - 1) %>%
rename(.value = COVID_ED_admissions)
rename(.value = ED_admissions)

last_training_date <- dat %>%
filter(data_type == "train") %>%
Expand All @@ -41,31 +96,11 @@ make_forecast_fig <- function(model_dir) {
pull(date) %>%
max()

arviz_split <- function(x) {
x %>%
select(-distribution) %>%
split(f = as.factor(x$distribution))
}

pyrenew_samples <-
read_csv(inference_data_path) %>%
rename_with(\(varname) str_remove_all(varname, "\\(|\\)|\\'|(, \\d+)")) |>
rename(
.chain = chain,
.iteration = draw
) |>
mutate(across(c(.chain, .iteration), \(x) as.integer(x + 1))) |>
mutate(
.draw = tidybayes:::draw_from_chain_and_iteration_(.chain, .iteration),
.after = .iteration
) |>
pivot_longer(-starts_with("."),
names_sep = ", ",
names_to = c("distribution", "name")
) |>
arviz_split() |>
map(\(x) pivot_wider(x, names_from = name) |> tidy_draws())

pyrenew_samples <- read_pyrenew_samples(inference_data_path,
filter_bad_chains = filter_bad_chains,
good_chain_tol = good_chain_tol
)

hosp_ci <-
pyrenew_samples$posterior_predictive %>%
Expand Down Expand Up @@ -118,35 +153,34 @@ make_forecast_fig <- function(model_dir) {
forecast_plot
}

forecast_fig <- make_forecast_fig(model_dir)

base_dir <- path(here(
save_plot(
filename = path(model_dir, "forecast_plot", ext = "pdf"),
plot = forecast_fig,
device = cairo_pdf, base_height = 6
)


# Temp code while command line version doesn't work
base_dir <- path(
"nssp_demo",
"private_data",
"covid-19_r_2024-10-10_f_2024-04-12_l_2024-10-09_t_2024-10-05"
))
"influenza_r_2024-10-10_f_2024-04-12_l_2024-10-09_t_2024-10-05"
)

walk(dir_ls(base_dir), function(model_dir) {
forecast_fig <- make_forecast_fig(model_dir)

forecast_fig_tbl <-
tibble(base_model_dir = dir_ls(base_dir)) %>%
filter(
path(base_model_dir, "inference_data", ext = "csv") %>%
file_exists()
) %>%
mutate(forecast_fig = map(base_model_dir, make_forecast_fig)) %>%
mutate(figure_path = path(base_model_dir, "forecast_plot", ext = "pdf"))

pwalk(
forecast_fig_tbl %>% select(forecast_fig, figure_path),
function(forecast_fig, figure_path) {
save_plot(
filename = figure_path,
plot = forecast_fig,
device = cairo_pdf, base_height = 6
)
}
)
save_plot(
filename = path(model_dir, "forecast_plot", ext = "pdf"),
plot = forecast_fig,
device = cairo_pdf, base_height = 6
)
})

str_c(forecast_fig_tbl$figure_path, collapse = " ") %>%
path(dir_ls(base_dir, type = "directory"), "forecast_plot", ext = "pdf") %>%
str_c(collapse = " ") %>%
str_c(
path(base_dir,
glue("{path_file(base_dir)}_all_forecasts"),
Expand All @@ -155,5 +189,3 @@ str_c(forecast_fig_tbl$figure_path, collapse = " ") %>%
sep = " "
) %>%
system2("pdfunite", args = .)

setdiff(usa::state.abb, path_file(forecast_fig_tbl$base_model_dir))
59 changes: 56 additions & 3 deletions renv.lock
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@
"Version": "0.0.0.9000",
"Source": "GitHub",
"RemoteType": "github",
"RemoteHost": "api.github.com",
"RemoteUsername": "CDCgov",
"RemoteRepo": "cfa-epinow2-pipeline",
"RemoteRef": "main",
"RemoteSha": "c04c8f857e49530ffad946ea582f404a42098735",
"RemoteSha": "5e79a4e28ac8ecd2d485e2868fa7b4bf0a1b999d",
"RemoteHost": "api.github.com",
"Remotes": "github::epiforecasts/EpiNow2@bcf297cf36a93cc56123bc3c9e8cebfb1421a962",
"Requirements": [
"AzureRMR",
Expand All @@ -107,7 +107,7 @@
"duckdb",
"rlang"
],
"Hash": "8e012d5b5d7114f9ff707973a74afd79"
"Hash": "51dc64d064becda8baf7254f809d3844"
},
"DBI": {
"Package": "DBI",
Expand Down Expand Up @@ -328,6 +328,16 @@
],
"Hash": "4f57884290cc75ab22f4af9e9d4ca862"
},
"argparser": {
"Package": "argparser",
"Version": "0.7.2",
"Source": "Repository",
"Repository": "CRAN",
"Requirements": [
"methods"
],
"Hash": "6ae51dd1cd6a2f5784273ab70d41c66c"
},
"arrayhelpers": {
"Package": "arrayhelpers",
"Version": "1.1-0",
Expand All @@ -340,6 +350,28 @@
],
"Hash": "3d4e52d458784c335af3846f2de64f75"
},
"arrow": {
"Package": "arrow",
"Version": "17.0.0.1",
"Source": "Repository",
"Repository": "CRAN",
"Requirements": [
"R",
"R6",
"assertthat",
"bit64",
"cpp11",
"glue",
"methods",
"purrr",
"rlang",
"stats",
"tidyselect",
"utils",
"vctrs"
],
"Hash": "14af96cb2973f6a6c220ce9c3e5b02cd"
},
"askpass": {
"Package": "askpass",
"Version": "1.2.0",
Expand All @@ -350,6 +382,16 @@
],
"Hash": "cad6cf7f1d5f6e906700b9d3e718c796"
},
"assertthat": {
"Package": "assertthat",
"Version": "0.2.1",
"Source": "Repository",
"Repository": "CRAN",
"Requirements": [
"tools"
],
"Hash": "50c838a310445e954bc13f26f26a6ecf"
},
"backports": {
"Package": "backports",
"Version": "1.5.0",
Expand Down Expand Up @@ -2190,6 +2232,17 @@
],
"Hash": "f561504ec2897f4d46f0c7657e488ae1"
},
"usa": {
"Package": "usa",
"Version": "0.1.2",
"Source": "Repository",
"Repository": "CRAN",
"Requirements": [
"R",
"tibble"
],
"Hash": "85af363a00cbf994a7b5153e053e0acd"
},
"utf8": {
"Package": "utf8",
"Version": "1.2.4",
Expand Down
Loading