From a35c967004cccd947e7011fdf6f7fef674154c48 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 2 Aug 2024 12:47:47 -0400 Subject: [PATCH] support log_resources --- DESCRIPTION | 4 ++-- NEWS.md | 3 ++- R/crew_controller_lsf.R | 4 +++- R/crew_controller_pbs.R | 4 +++- R/crew_controller_sge.R | 4 +++- R/crew_controller_slurm.R | 4 +++- man/crew_controller_lsf.Rd | 32 ++++++++++++++++++++++++++++++++ man/crew_controller_pbs.Rd | 32 ++++++++++++++++++++++++++++++++ man/crew_controller_sge.Rd | 32 ++++++++++++++++++++++++++++++++ man/crew_controller_slurm.Rd | 32 ++++++++++++++++++++++++++++++++ 10 files changed, 144 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a3c63de..64eb617 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,7 @@ Description: In computationally demanding analysis projects, 'clustermq' by Schubert (2019) ), and 'batchtools' by Lang, Bischl, and Surmann (2017). . -Version: 0.3.2.9003 +Version: 0.3.2.9004 License: MIT + file LICENSE URL: https://wlandau.github.io/crew.cluster/, https://github.com/wlandau/crew.cluster @@ -49,7 +49,7 @@ Authors@R: c( Depends: R (>= 4.0.0) Imports: - crew (>= 0.9.5.9003), + crew (>= 0.9.5.9007), ps, lifecycle, R6, diff --git a/NEWS.md b/NEWS.md index 8e87a3d..0df2dca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,8 @@ -# crew.cluster 0.3.2.9003 (development) +# crew.cluster 0.3.2.9004 (development) * Add `slurm_memory_gigabytes_required` to set `--mem` in SLURM (#44, @multimeric). * Add `r_arguments` to accept R command line arguments for workers. +* Support `log_resources` in controllers. # crew.cluster 0.3.2 diff --git a/R/crew_controller_lsf.R b/R/crew_controller_lsf.R index e330fba..9606769 100644 --- a/R/crew_controller_lsf.R +++ b/R/crew_controller_lsf.R @@ -38,6 +38,7 @@ crew_controller_lsf <- function( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -77,7 +78,8 @@ crew_controller_lsf <- function( tls_config = tls_config, seconds_interval = seconds_interval, seconds_timeout = seconds_timeout, - retry_tasks = retry_tasks + retry_tasks = retry_tasks, + log_resources = log_resources ) launcher <- crew_launcher_lsf( name = name, diff --git a/R/crew_controller_pbs.R b/R/crew_controller_pbs.R index 11987dd..0f57939 100644 --- a/R/crew_controller_pbs.R +++ b/R/crew_controller_pbs.R @@ -34,6 +34,7 @@ crew_controller_pbs <- function( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -74,7 +75,8 @@ crew_controller_pbs <- function( tls_config = tls_config, seconds_interval = seconds_interval, seconds_timeout = seconds_timeout, - retry_tasks = retry_tasks + retry_tasks = retry_tasks, + log_resources = log_resources ) launcher <- crew_launcher_pbs( name = name, diff --git a/R/crew_controller_sge.R b/R/crew_controller_sge.R index 0578d4a..7f11950 100644 --- a/R/crew_controller_sge.R +++ b/R/crew_controller_sge.R @@ -34,6 +34,7 @@ crew_controller_sge <- function( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -76,7 +77,8 @@ crew_controller_sge <- function( tls_config = tls_config, seconds_interval = seconds_interval, seconds_timeout = seconds_timeout, - retry_tasks = retry_tasks + retry_tasks = retry_tasks, + log_resources = log_resources ) launcher <- crew_launcher_sge( name = name, diff --git a/R/crew_controller_slurm.R b/R/crew_controller_slurm.R index 52b4ce2..d831acd 100644 --- a/R/crew_controller_slurm.R +++ b/R/crew_controller_slurm.R @@ -38,6 +38,7 @@ crew_controller_slurm <- function( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -78,7 +79,8 @@ crew_controller_slurm <- function( tls_config = tls_config, seconds_interval = seconds_interval, seconds_timeout = seconds_timeout, - retry_tasks = retry_tasks + retry_tasks = retry_tasks, + log_resources = log_resources ) launcher <- crew_launcher_slurm( name = name, diff --git a/man/crew_controller_lsf.Rd b/man/crew_controller_lsf.Rd index 75a377b..98c702b 100644 --- a/man/crew_controller_lsf.Rd +++ b/man/crew_controller_lsf.Rd @@ -20,6 +20,7 @@ crew_controller_lsf( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -101,6 +102,37 @@ return a \code{mirai} error code (code number 19). Use \code{FALSE} for debugging purposes, e.g. to confirm that a task is causing a worker to run out of memory or crash in some other way.} +\item{log_resources}{Optional character string with a file path to a +text file to log memory consumption. +Set \code{log_resources} to \code{NULL} to avoid writing to a log file. +If you supply a path, then +the \code{log()} method will write memory usage statistics to the file, +and most controller methods will do the same with throttling +so resource consumption is recorded throughout the whole life cycle +of the controller. + +The log file is in comma-separated values +(CSV) format which can be easily read by \code{readr::read_csv()}. +The controller automatically deletes the old log file when it starts +(when \code{controller$start()} is called for the first time, but not +subsequent times). + +The log file has one row per observation of a process, +including the current +R process ("client") and the \code{mirai} dispatcher. If the dispatcher +is not included in the output, it means the dispatcher process +is not running. +Columns include: +* \code{type}: the type of process (client or dispatcher) +* \code{pid}: the process ID. +* \code{status}: The process status (from \code{ps::ps_status()}). +* \code{rss}: resident set size (RSS). RS is the total memory held by +a process, including shared libraries which may also be +in use by other processes. RSS is obtained +from \code{ps::ps_memory_info()} and shown in bytes. +* \code{elapsed}: number of elapsed seconds since the R process was +started (from \code{proc.time()["elapsed"]}).} + \item{tasks_max}{Maximum number of tasks that a worker will do before exiting. See the \code{maxtasks} argument of \code{mirai::daemon()}. \code{crew} does not diff --git a/man/crew_controller_pbs.Rd b/man/crew_controller_pbs.Rd index 70cbf3d..3cb52f3 100644 --- a/man/crew_controller_pbs.Rd +++ b/man/crew_controller_pbs.Rd @@ -20,6 +20,7 @@ crew_controller_pbs( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -102,6 +103,37 @@ return a \code{mirai} error code (code number 19). Use \code{FALSE} for debugging purposes, e.g. to confirm that a task is causing a worker to run out of memory or crash in some other way.} +\item{log_resources}{Optional character string with a file path to a +text file to log memory consumption. +Set \code{log_resources} to \code{NULL} to avoid writing to a log file. +If you supply a path, then +the \code{log()} method will write memory usage statistics to the file, +and most controller methods will do the same with throttling +so resource consumption is recorded throughout the whole life cycle +of the controller. + +The log file is in comma-separated values +(CSV) format which can be easily read by \code{readr::read_csv()}. +The controller automatically deletes the old log file when it starts +(when \code{controller$start()} is called for the first time, but not +subsequent times). + +The log file has one row per observation of a process, +including the current +R process ("client") and the \code{mirai} dispatcher. If the dispatcher +is not included in the output, it means the dispatcher process +is not running. +Columns include: +* \code{type}: the type of process (client or dispatcher) +* \code{pid}: the process ID. +* \code{status}: The process status (from \code{ps::ps_status()}). +* \code{rss}: resident set size (RSS). RS is the total memory held by +a process, including shared libraries which may also be +in use by other processes. RSS is obtained +from \code{ps::ps_memory_info()} and shown in bytes. +* \code{elapsed}: number of elapsed seconds since the R process was +started (from \code{proc.time()["elapsed"]}).} + \item{tasks_max}{Maximum number of tasks that a worker will do before exiting. See the \code{maxtasks} argument of \code{mirai::daemon()}. \code{crew} does not diff --git a/man/crew_controller_sge.Rd b/man/crew_controller_sge.Rd index acb1204..ba25716 100644 --- a/man/crew_controller_sge.Rd +++ b/man/crew_controller_sge.Rd @@ -20,6 +20,7 @@ crew_controller_sge( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -104,6 +105,37 @@ return a \code{mirai} error code (code number 19). Use \code{FALSE} for debugging purposes, e.g. to confirm that a task is causing a worker to run out of memory or crash in some other way.} +\item{log_resources}{Optional character string with a file path to a +text file to log memory consumption. +Set \code{log_resources} to \code{NULL} to avoid writing to a log file. +If you supply a path, then +the \code{log()} method will write memory usage statistics to the file, +and most controller methods will do the same with throttling +so resource consumption is recorded throughout the whole life cycle +of the controller. + +The log file is in comma-separated values +(CSV) format which can be easily read by \code{readr::read_csv()}. +The controller automatically deletes the old log file when it starts +(when \code{controller$start()} is called for the first time, but not +subsequent times). + +The log file has one row per observation of a process, +including the current +R process ("client") and the \code{mirai} dispatcher. If the dispatcher +is not included in the output, it means the dispatcher process +is not running. +Columns include: +* \code{type}: the type of process (client or dispatcher) +* \code{pid}: the process ID. +* \code{status}: The process status (from \code{ps::ps_status()}). +* \code{rss}: resident set size (RSS). RS is the total memory held by +a process, including shared libraries which may also be +in use by other processes. RSS is obtained +from \code{ps::ps_memory_info()} and shown in bytes. +* \code{elapsed}: number of elapsed seconds since the R process was +started (from \code{proc.time()["elapsed"]}).} + \item{tasks_max}{Maximum number of tasks that a worker will do before exiting. See the \code{maxtasks} argument of \code{mirai::daemon()}. \code{crew} does not diff --git a/man/crew_controller_slurm.Rd b/man/crew_controller_slurm.Rd index 71c79fc..ff98fbc 100644 --- a/man/crew_controller_slurm.Rd +++ b/man/crew_controller_slurm.Rd @@ -20,6 +20,7 @@ crew_controller_slurm( seconds_wall = Inf, seconds_exit = NULL, retry_tasks = TRUE, + log_resources = NULL, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -102,6 +103,37 @@ return a \code{mirai} error code (code number 19). Use \code{FALSE} for debugging purposes, e.g. to confirm that a task is causing a worker to run out of memory or crash in some other way.} +\item{log_resources}{Optional character string with a file path to a +text file to log memory consumption. +Set \code{log_resources} to \code{NULL} to avoid writing to a log file. +If you supply a path, then +the \code{log()} method will write memory usage statistics to the file, +and most controller methods will do the same with throttling +so resource consumption is recorded throughout the whole life cycle +of the controller. + +The log file is in comma-separated values +(CSV) format which can be easily read by \code{readr::read_csv()}. +The controller automatically deletes the old log file when it starts +(when \code{controller$start()} is called for the first time, but not +subsequent times). + +The log file has one row per observation of a process, +including the current +R process ("client") and the \code{mirai} dispatcher. If the dispatcher +is not included in the output, it means the dispatcher process +is not running. +Columns include: +* \code{type}: the type of process (client or dispatcher) +* \code{pid}: the process ID. +* \code{status}: The process status (from \code{ps::ps_status()}). +* \code{rss}: resident set size (RSS). RS is the total memory held by +a process, including shared libraries which may also be +in use by other processes. RSS is obtained +from \code{ps::ps_memory_info()} and shown in bytes. +* \code{elapsed}: number of elapsed seconds since the R process was +started (from \code{proc.time()["elapsed"]}).} + \item{tasks_max}{Maximum number of tasks that a worker will do before exiting. See the \code{maxtasks} argument of \code{mirai::daemon()}. \code{crew} does not