Skip to content

Commit

Permalink
condense data testing
Browse files Browse the repository at this point in the history
  • Loading branch information
mitokic committed Jul 25, 2024
1 parent edae42f commit e79a10d
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 23 deletions.
16 changes: 9 additions & 7 deletions R/final_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,15 @@ final_models <- function(run_info,

# clean up any parallel run process
par_end(cl)

# condense outputs into less files for larger runs
if(length(combo_list) > 100) {
cli::cli_progress_step("Condensing Forecasts")

condense_data(run_info,
parallel_processing,
num_cores)
}

# reconcile hierarchical forecasts
if (forecast_approach != "bottoms_up") {
Expand All @@ -633,13 +642,6 @@ final_models <- function(run_info,
)
}

# condense outputs into less files for larger runs
if(length(combo_list) > 1000) {
condense_data(run_info,
parallel_processing,
num_cores)
}

# calculate weighted mape
weighted_mape <- get_forecast_data(run_info = run_info) %>%
dplyr::filter(Run_Type == "Back_Test",
Expand Down
34 changes: 25 additions & 9 deletions R/hierarchy.R
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,20 @@ reconcile_hierarchical_data <- function(run_info,
hts_nodes <- hts_list$nodes
original_combo_list <- hts_list$original_combos
hts_combo_list <- hts_list$hts_combos

# check if data has been condensed
cond_path <- paste0(
run_info$path, "/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
)

condensed_files <- list_files(run_info$storage_object, fs::path(cond_path))

if (length(condensed_files) > 0) {
condensed <- TRUE
} else {
condensed <- FALSE
}

# get unreconciled forecast data
if (is.null(parallel_processing)) {
Expand All @@ -430,10 +444,17 @@ reconcile_hierarchical_data <- function(run_info,
return_type <- "df"
}

fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*models", ".", run_info$data_output
)
if(condensed) {
fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
)
} else {
fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*models", ".", run_info$data_output
)
}

unreconciled_tbl <- read_file(run_info,
path = fcst_path,
Expand Down Expand Up @@ -677,11 +698,6 @@ reconcile_hierarchical_data <- function(run_info,
) %>%
dplyr::select(Combo, Date, Target)

fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*models", ".", run_info$data_output
)

schema <- arrow::schema(
arrow::field("Combo_ID", arrow::string()),
arrow::field("Model_ID", arrow::string()),
Expand Down
13 changes: 6 additions & 7 deletions R/read_write_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,21 +86,20 @@ get_forecast_data <- function(run_info,
}

# get forecast data
if(condensed) {
print("reading condensed files")
if(forecast_approach != "bottoms_up") {
fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
hash_data(run_info$run_name), "*reconciled", ".", run_info$data_output
)
} else if (forecast_approach == "bottoms_up") {
} else if (condensed) {
fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*models", ".", run_info$data_output
hash_data(run_info$run_name), "*condensed", ".", run_info$data_output
)
} else {
fcst_path <- paste0(
"/forecasts/*", hash_data(run_info$experiment_name), "-",
hash_data(run_info$run_name), "*reconciled", ".", run_info$data_output
hash_data(run_info$run_name), "*models", ".", run_info$data_output
)
}

Expand Down Expand Up @@ -716,7 +715,7 @@ condense_data <- function(run_info,
list_of_batches <- list()

# Define the batch size
batch_size <- 10000
batch_size <- 100

# Calculate the number of batches needed
num_batches <- ceiling(length(initial_file_list) / batch_size)
Expand Down

0 comments on commit e79a10d

Please sign in to comment.