diff --git a/404.html b/404.html index ee5e127..3178c21 100644 --- a/404.html +++ b/404.html @@ -21,7 +21,7 @@ syrup - 0.1.0.9000 + 0.1.1 diff --git a/LICENSE-text.html b/LICENSE-text.html index 93c7403..3cea2cb 100644 --- a/LICENSE-text.html +++ b/LICENSE-text.html @@ -7,7 +7,7 @@ syrup - 0.1.0.9000 + 0.1.1 diff --git a/LICENSE.html b/LICENSE.html index d6b4ba4..47cf37b 100644 --- a/LICENSE.html +++ b/LICENSE.html @@ -7,7 +7,7 @@ syrup - 0.1.0.9000 + 0.1.1 diff --git a/authors.html b/authors.html index 6645d81..8b0512a 100644 --- a/authors.html +++ b/authors.html @@ -7,7 +7,7 @@ syrup - 0.1.0.9000 + 0.1.1 @@ -49,13 +49,13 @@ Citation Couch S (2024). syrup: Measure Memory and CPU Usage for Parallel R Code. -R package version 0.1.0.9000, https://simonpcouch.github.io/syrup/, https://github.com/simonpcouch/syrup. +R package version 0.1.1, https://simonpcouch.github.io/syrup/, https://github.com/simonpcouch/syrup. @Manual{, title = {syrup: Measure Memory and CPU Usage for Parallel R Code}, author = {Simon Couch}, year = {2024}, - note = {R package version 0.1.0.9000, https://simonpcouch.github.io/syrup/}, + note = {R package version 0.1.1, https://simonpcouch.github.io/syrup/}, url = {https://github.com/simonpcouch/syrup}, } diff --git a/index.html b/index.html index 93fa161..0b6974c 100644 --- a/index.html +++ b/index.html @@ -23,7 +23,7 @@ syrup - 0.1.0.9000 + 0.1.1 diff --git a/news/index.html b/news/index.html index 2cef456..be16aa9 100644 --- a/news/index.html +++ b/news/index.html @@ -7,7 +7,7 @@ syrup - 0.1.0.9000 + 0.1.1 @@ -32,7 +32,7 @@ -syrup (development version) +syrup 0.1.1 Resolves CRAN test failures. diff --git a/pkgdown.yml b/pkgdown.yml index e476466..118ac14 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: 3.1.11 pkgdown: 2.1.0 pkgdown_sha: ~ articles: {} -last_built: 2024-07-18T13:28Z +last_built: 2024-07-18T14:10Z urls: reference: https://simonpcouch.github.io/syrup/reference article: https://simonpcouch.github.io/syrup/articles diff --git a/reference/index.html b/reference/index.html index d7d33a9..e501f6a 100644 --- a/reference/index.html +++ b/reference/index.html @@ -7,7 +7,7 @@ syrup - 0.1.0.9000 + 0.1.1 diff --git a/reference/syrup-package.html b/reference/syrup-package.html index e70d6aa..24837df 100644 --- a/reference/syrup-package.html +++ b/reference/syrup-package.html @@ -9,7 +9,7 @@ syrup - 0.1.0.9000 + 0.1.1 diff --git a/reference/syrup.html b/reference/syrup.html index 942b711..883ca85 100644 --- a/reference/syrup.html +++ b/reference/syrup.html @@ -15,7 +15,7 @@ syrup - 0.1.0.9000 + 0.1.1 @@ -115,29 +115,28 @@ Examples#> # A tibble: 3 × 8 #> id time pid ppid name pct_cpu rss vms #> <dbl> <dttm> <int> <int> <chr> <dbl> <bch:byt> <bch:byt> -#> 1 1 2024-07-18 13:28:18 5943 1618 R NA 269MB 928MB -#> 2 2 2024-07-18 13:28:19 5943 1618 R 0 269MB 928MB -#> 3 3 2024-07-18 13:28:19 5943 1618 R 0 269MB 928MB +#> 1 1 2024-07-18 14:10:29 5744 1663 R NA 269MB 928MB +#> 2 2 2024-07-18 14:10:29 5744 1663 R 0 269MB 928MB +#> 3 3 2024-07-18 14:10:30 5744 1663 R 0 269MB 928MB # to snapshot memory and CPU information more (or less) often, set `interval` syrup(Sys.sleep(1), interval = .01) -#> # A tibble: 14 × 8 +#> # A tibble: 13 × 8 #> id time pid ppid name pct_cpu rss vms #> <dbl> <dttm> <int> <int> <chr> <dbl> <bch:byt> <bch:byt> -#> 1 1 2024-07-18 13:28:20 5943 1618 R NA 274MB 933MB -#> 2 2 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 3 3 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 4 4 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 5 5 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 6 6 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 7 7 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 8 8 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 9 9 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 10 10 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 11 11 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB -#> 12 12 2024-07-18 13:28:21 5943 1618 R 0 274MB 933MB -#> 13 13 2024-07-18 13:28:21 5943 1618 R 0 274MB 933MB -#> 14 14 2024-07-18 13:28:21 5943 1618 R 0 274MB 933MB +#> 1 1 2024-07-18 14:10:31 5744 1663 R NA 273MB 932MB +#> 2 2 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 3 3 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 4 4 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 5 5 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 6 6 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 7 7 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 8 8 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 9 9 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 10 10 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 11 11 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB +#> 12 12 2024-07-18 14:10:32 5744 1663 R 0 273MB 932MB +#> 13 13 2024-07-18 14:10:32 5744 1663 R 0 273MB 932MB # use `peak = TRUE` to return only the snapshot with # the highest memory usage (as `sum(rss)`) @@ -145,7 +144,7 @@ Examples#> # A tibble: 1 × 8 #> id time pid ppid name pct_cpu rss vms #> <dbl> <dttm> <int> <int> <chr> <dbl> <bch:byt> <bch:byt> -#> 1 1 2024-07-18 13:28:21 5943 1618 R NA 275MB 934MB +#> 1 1 2024-07-18 14:10:32 5744 1663 R NA 274MB 933MB # results from syrup are more---or maybe only---useful when # computations are evaluated in parallel. see package README diff --git a/search.json b/search.json index 24e41bd..12d050a 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"https://simonpcouch.github.io/syrup/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 Posit Software, PBC Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://simonpcouch.github.io/syrup/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Simon Couch. Author, maintainer. Posit Software, PBC. Copyright holder, funder.","code":""},{"path":"https://simonpcouch.github.io/syrup/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Couch S (2024). syrup: Measure Memory CPU Usage Parallel R Code. R package version 0.1.0.9000, https://simonpcouch.github.io/syrup/, https://github.com/simonpcouch/syrup.","code":"@Manual{, title = {syrup: Measure Memory and CPU Usage for Parallel R Code}, author = {Simon Couch}, year = {2024}, note = {R package version 0.1.0.9000, https://simonpcouch.github.io/syrup/}, url = {https://github.com/simonpcouch/syrup}, }"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"syrup-","dir":"","previous_headings":"","what":"Measure Memory and CPU Usage for Parallel R Code","title":"Measure Memory and CPU Usage for Parallel R Code","text":"goal syrup measure memory CPU usage R code regularly taking snapshots calls system command ps. package provides entry point (albeit coarse) profile usage system resources R code run parallel. package name homage syrupy (SYstem Resource Usage Profile …um, Yeah), Python tool jeetsukumaran/Syrupy.","code":""},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Measure Memory and CPU Usage for Parallel R Code","text":"Install latest release syrup CRAN like : can install development version syrup like :","code":"install.packages(\"syrup\") pak::pak(\"simonpcouch/syrup\")"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"example","dir":"","previous_headings":"","what":"Example","title":"Measure Memory and CPU Usage for Parallel R Code","text":"main function syrup package function name. main argument syrup() expression, function outputs tibble. Supplying rather boring expression: tibble, id defines specific time point process usage snapshotted, remaining columns show output derived ps::ps(). Notably, pid process ID, ppid process ID parent process, pct_cpu percent CPU usage, rss resident set size (measure memory usage). function works : Setting another R process sesh queries memory information regular interval, Evaluating supplied expression, Reading memory information back main process sesh, Closing sesh, Returning memory information.","code":"library(syrup) #> Loading required package: bench syrup(Sys.sleep(1)) #> # A tibble: 48 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-03 11:42:33 67101 60522 R NA 112MB 392GB #> 2 1 2024-07-03 11:42:33 60522 60300 rsession-arm64 NA 653MB 394GB #> 3 1 2024-07-03 11:42:33 58919 1 R NA 773MB 393GB #> 4 1 2024-07-03 11:42:33 97009 1 rsession-arm64 NA 128KB 394GB #> 5 1 2024-07-03 11:42:33 97008 1 rsession-arm64 NA 128KB 394GB #> 6 1 2024-07-03 11:42:33 97007 1 rsession-arm64 NA 240KB 394GB #> 7 1 2024-07-03 11:42:33 97006 1 rsession-arm64 NA 240KB 394GB #> 8 1 2024-07-03 11:42:33 97005 1 rsession-arm64 NA 128KB 394GB #> 9 1 2024-07-03 11:42:33 91012 1 R NA 128KB 393GB #> 10 1 2024-07-03 11:42:33 90999 1 R NA 128KB 393GB #> # ℹ 38 more rows"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"application-model-tuning","dir":"","previous_headings":"","what":"Application: model tuning","title":"Measure Memory and CPU Usage for Parallel R Code","text":"interesting demo, ’ll tune regularized linear model using cross-validation tidymodels. First, loading needed packages: Using future define parallelism strategy, ’ll set plan(multicore, workers = 5), indicating ’d like use forking 5 workers. default, future disables forking RStudio; know , context building README, usage forking safe, ’ll temporarily override default parallelly.fork.enable. Now, simulating data: call tune_grid() setup sequentially sending data five child processes actually carry model fitting. models fitted, data sent back parent process combined. better understand system resource usage throughout process, wrap call syrup(): results bit interesting sequential results Sys.sleep(1). Look closely ppids id; snapshot two, ’ll see five identical ppids id, ppids match remaining pid one remaining R process. shows us ’ve indeed distributed computations using forking one remaining R process, “parent,” spawned five child processes . can plot result get better sense memory usage processes changes time: first, parent process non-NA rss, tidymodels hasn’t sent data workers yet. , 5 workers receives data tidymodels begins fitting models. Eventually, workers returns results parent process, rss NA. parent process wraps computations completing evaluation expression, point syrup() returns. (Keep mind: memory weird. plot, total memory allotted parent session five workers ID simply sum rss values, memory shared among .) see another side story come together CPU usage: percent CPU usage always NA first time process ID seen, usage calculation based change since previous recorded value. soon ’re able start measuring, see workers 100% usage, parent process largely idle sent data workers.","code":"library(future) library(tidymodels) library(rlang) local_options(parallelly.fork.enable = TRUE) plan(multicore, workers = 5) set.seed(1) dat <- sim_regression(1000000) dat #> # A tibble: 1,000,000 × 21 #> outcome predictor_01 predictor_02 predictor_03 predictor_04 predictor_05 #> #> 1 3.63 -1.88 0.872 -0.799 -0.0379 2.68 #> 2 41.6 0.551 -2.47 2.37 3.90 5.18 #> 3 -6.99 -2.51 -3.15 2.61 2.13 3.08 #> 4 33.2 4.79 1.86 -2.37 4.27 -3.59 #> 5 34.3 0.989 -0.315 3.08 2.56 -5.91 #> 6 26.7 -2.46 -0.459 1.75 -5.24 5.04 #> 7 21.4 1.46 -0.674 -0.894 -3.91 -3.38 #> 8 21.7 2.21 1.28 -1.05 -0.561 2.99 #> 9 -8.84 1.73 0.0725 0.0976 5.40 4.30 #> 10 24.5 -0.916 -0.223 -0.561 -4.12 0.0508 #> # ℹ 999,990 more rows #> # ℹ 15 more variables: predictor_06 , predictor_07 , #> # predictor_08 , predictor_09 , predictor_10 , #> # predictor_11 , predictor_12 , predictor_13 , #> # predictor_14 , predictor_15 , predictor_16 , #> # predictor_17 , predictor_18 , predictor_19 , #> # predictor_20 res_mem <- syrup({ res <- tune_grid( linear_reg(engine = \"glmnet\", penalty = tune()), outcome ~ ., vfold_cv(dat) ) }) res_mem #> # A tibble: 158 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-03 11:42:38 67101 60522 R NA 1.05GB 393GB #> 2 1 2024-07-03 11:42:38 60522 60300 rsession-arm64 NA 653.44MB 394GB #> 3 1 2024-07-03 11:42:38 58919 1 R NA 838.56MB 393GB #> 4 1 2024-07-03 11:42:38 97009 1 rsession-arm64 NA 128KB 394GB #> 5 1 2024-07-03 11:42:38 97008 1 rsession-arm64 NA 128KB 394GB #> 6 1 2024-07-03 11:42:38 97007 1 rsession-arm64 NA 240KB 394GB #> 7 1 2024-07-03 11:42:38 97006 1 rsession-arm64 NA 240KB 394GB #> 8 1 2024-07-03 11:42:38 97005 1 rsession-arm64 NA 128KB 394GB #> 9 1 2024-07-03 11:42:38 91012 1 R NA 128KB 393GB #> 10 1 2024-07-03 11:42:38 90999 1 R NA 128KB 393GB #> # ℹ 148 more rows worker_ppid <- ps::ps_pid() res_mem %>% filter(ppid == worker_ppid | pid == worker_ppid) %>% ggplot() + aes(x = id, y = rss, group = pid) + geom_line() + scale_x_continuous(breaks = 1:max(res_mem$id)) res_mem %>% filter(ppid == worker_ppid | pid == worker_ppid) %>% ggplot() + aes(x = id, y = pct_cpu, group = pid) + geom_line() + scale_x_continuous(breaks = 1:max(res_mem$id))"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Measure Memory and CPU Usage for Parallel R Code","text":"much verbiage package assumes supplied expression distributed across CPU cores, ’s nothing specific package necessitates expression provided syrup() run parallel. Said another way, syrup work just fine “normal,” sequentially-run R code. said, many better, fine-grained tools job case sequential R code, Rprofmem(), profmem package, bench package, packages R-prof GitHub organization. Results syrup provide enough detail coarsest analyses memory CPU usage, provide entry point “profiling” system resource usage R code runs parallel.","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup-package.html","id":null,"dir":"Reference","previous_headings":"","what":"syrup: Measure Memory and CPU Usage for Parallel R Code — syrup-package","title":"syrup: Measure Memory and CPU Usage for Parallel R Code — syrup-package","text":"Measures memory CPU usage R code regularly taking snapshots calls system command 'ps'. package provides entry point (albeit coarse) profile usage system resources R code run parallel.","code":""},{"path":[]},{"path":"https://simonpcouch.github.io/syrup/reference/syrup-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"syrup: Measure Memory and CPU Usage for Parallel R Code — syrup-package","text":"Maintainer: Simon Couch simon.couch@posit.co (ORCID) contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":null,"dir":"Reference","previous_headings":"","what":"Memory and CPU Usage Information for Parallel R Code — syrup","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"function wrapper around system command ps can used benchmark (peak) memory CPU usage parallel R code. taking snapshots memory usage R processes regular interval, function dynamically builds profile usage system resources.","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"","code":"syrup(expr, interval = 0.5, peak = FALSE, env = caller_env())"},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"expr expression. interval interval take snapshots respirce usage. practice, overhead top intervals. peak Whether return rows \"peak\" memory usage. Interpreted id maximum rss sum. Defaults FALSE, may helpful set peak = TRUE potentially long-running processes tibble grow large. env environment evaluate expr .","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"tibble columns id time number columns ps::ps() output describing memory CPU usage. Notably, process ID pid, parent process ID ppid, percent CPU usage, resident set size rss (measure memory usage).","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"much verbiage package assumes supplied expression distributed across CPU cores, nothing specific package necessitates expression provided syrup() run parallel. Said another way, syrup() work just fine \"normal,\" sequentially-run R code (examples). said, many better, fine-grained tools job case sequential R code, Rprofmem(), profmem package, bench package, packages R-prof GitHub organization. Loosely, function works : Setting another R process (call sesh) queries system information using ps::ps() regular interval, Evaluating supplied expression, Reading queried system information back main process sesh, Closing sesh, Returning queried system information. Note information R process sesh filtered results automatically.","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"","code":"# pass any expression to syrup. first, sequentially: res_syrup <- syrup({res_output <- Sys.sleep(1)}) res_syrup #> # A tibble: 3 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-18 13:28:18 5943 1618 R NA 269MB 928MB #> 2 2 2024-07-18 13:28:19 5943 1618 R 0 269MB 928MB #> 3 3 2024-07-18 13:28:19 5943 1618 R 0 269MB 928MB # to snapshot memory and CPU information more (or less) often, set `interval` syrup(Sys.sleep(1), interval = .01) #> # A tibble: 14 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-18 13:28:20 5943 1618 R NA 274MB 933MB #> 2 2 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 3 3 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 4 4 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 5 5 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 6 6 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 7 7 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 8 8 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 9 9 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 10 10 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 11 11 2024-07-18 13:28:20 5943 1618 R 0 274MB 933MB #> 12 12 2024-07-18 13:28:21 5943 1618 R 0 274MB 933MB #> 13 13 2024-07-18 13:28:21 5943 1618 R 0 274MB 933MB #> 14 14 2024-07-18 13:28:21 5943 1618 R 0 274MB 933MB # use `peak = TRUE` to return only the snapshot with # the highest memory usage (as `sum(rss)`) syrup(Sys.sleep(1), interval = .01, peak = TRUE) #> # A tibble: 1 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-18 13:28:21 5943 1618 R NA 275MB 934MB # results from syrup are more---or maybe only---useful when # computations are evaluated in parallel. see package README # for an example."},{"path":"https://simonpcouch.github.io/syrup/news/index.html","id":"syrup-development-version","dir":"Changelog","previous_headings":"","what":"syrup (development version)","title":"syrup (development version)","text":"Resolves CRAN test failures.","code":""},{"path":"https://simonpcouch.github.io/syrup/news/index.html","id":"syrup-010","dir":"Changelog","previous_headings":"","what":"syrup 0.1.0","title":"syrup 0.1.0","text":"CRAN release: 2024-07-09 Initial CRAN submission.","code":""}] +[{"path":"https://simonpcouch.github.io/syrup/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 Posit Software, PBC Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://simonpcouch.github.io/syrup/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Simon Couch. Author, maintainer. Posit Software, PBC. Copyright holder, funder.","code":""},{"path":"https://simonpcouch.github.io/syrup/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Couch S (2024). syrup: Measure Memory CPU Usage Parallel R Code. R package version 0.1.1, https://simonpcouch.github.io/syrup/, https://github.com/simonpcouch/syrup.","code":"@Manual{, title = {syrup: Measure Memory and CPU Usage for Parallel R Code}, author = {Simon Couch}, year = {2024}, note = {R package version 0.1.1, https://simonpcouch.github.io/syrup/}, url = {https://github.com/simonpcouch/syrup}, }"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"syrup-","dir":"","previous_headings":"","what":"Measure Memory and CPU Usage for Parallel R Code","title":"Measure Memory and CPU Usage for Parallel R Code","text":"goal syrup measure memory CPU usage R code regularly taking snapshots calls system command ps. package provides entry point (albeit coarse) profile usage system resources R code run parallel. package name homage syrupy (SYstem Resource Usage Profile …um, Yeah), Python tool jeetsukumaran/Syrupy.","code":""},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Measure Memory and CPU Usage for Parallel R Code","text":"Install latest release syrup CRAN like : can install development version syrup like :","code":"install.packages(\"syrup\") pak::pak(\"simonpcouch/syrup\")"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"example","dir":"","previous_headings":"","what":"Example","title":"Measure Memory and CPU Usage for Parallel R Code","text":"main function syrup package function name. main argument syrup() expression, function outputs tibble. Supplying rather boring expression: tibble, id defines specific time point process usage snapshotted, remaining columns show output derived ps::ps(). Notably, pid process ID, ppid process ID parent process, pct_cpu percent CPU usage, rss resident set size (measure memory usage). function works : Setting another R process sesh queries memory information regular interval, Evaluating supplied expression, Reading memory information back main process sesh, Closing sesh, Returning memory information.","code":"library(syrup) #> Loading required package: bench syrup(Sys.sleep(1)) #> # A tibble: 48 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-03 11:42:33 67101 60522 R NA 112MB 392GB #> 2 1 2024-07-03 11:42:33 60522 60300 rsession-arm64 NA 653MB 394GB #> 3 1 2024-07-03 11:42:33 58919 1 R NA 773MB 393GB #> 4 1 2024-07-03 11:42:33 97009 1 rsession-arm64 NA 128KB 394GB #> 5 1 2024-07-03 11:42:33 97008 1 rsession-arm64 NA 128KB 394GB #> 6 1 2024-07-03 11:42:33 97007 1 rsession-arm64 NA 240KB 394GB #> 7 1 2024-07-03 11:42:33 97006 1 rsession-arm64 NA 240KB 394GB #> 8 1 2024-07-03 11:42:33 97005 1 rsession-arm64 NA 128KB 394GB #> 9 1 2024-07-03 11:42:33 91012 1 R NA 128KB 393GB #> 10 1 2024-07-03 11:42:33 90999 1 R NA 128KB 393GB #> # ℹ 38 more rows"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"application-model-tuning","dir":"","previous_headings":"","what":"Application: model tuning","title":"Measure Memory and CPU Usage for Parallel R Code","text":"interesting demo, ’ll tune regularized linear model using cross-validation tidymodels. First, loading needed packages: Using future define parallelism strategy, ’ll set plan(multicore, workers = 5), indicating ’d like use forking 5 workers. default, future disables forking RStudio; know , context building README, usage forking safe, ’ll temporarily override default parallelly.fork.enable. Now, simulating data: call tune_grid() setup sequentially sending data five child processes actually carry model fitting. models fitted, data sent back parent process combined. better understand system resource usage throughout process, wrap call syrup(): results bit interesting sequential results Sys.sleep(1). Look closely ppids id; snapshot two, ’ll see five identical ppids id, ppids match remaining pid one remaining R process. shows us ’ve indeed distributed computations using forking one remaining R process, “parent,” spawned five child processes . can plot result get better sense memory usage processes changes time: first, parent process non-NA rss, tidymodels hasn’t sent data workers yet. , 5 workers receives data tidymodels begins fitting models. Eventually, workers returns results parent process, rss NA. parent process wraps computations completing evaluation expression, point syrup() returns. (Keep mind: memory weird. plot, total memory allotted parent session five workers ID simply sum rss values, memory shared among .) see another side story come together CPU usage: percent CPU usage always NA first time process ID seen, usage calculation based change since previous recorded value. soon ’re able start measuring, see workers 100% usage, parent process largely idle sent data workers.","code":"library(future) library(tidymodels) library(rlang) local_options(parallelly.fork.enable = TRUE) plan(multicore, workers = 5) set.seed(1) dat <- sim_regression(1000000) dat #> # A tibble: 1,000,000 × 21 #> outcome predictor_01 predictor_02 predictor_03 predictor_04 predictor_05 #> #> 1 3.63 -1.88 0.872 -0.799 -0.0379 2.68 #> 2 41.6 0.551 -2.47 2.37 3.90 5.18 #> 3 -6.99 -2.51 -3.15 2.61 2.13 3.08 #> 4 33.2 4.79 1.86 -2.37 4.27 -3.59 #> 5 34.3 0.989 -0.315 3.08 2.56 -5.91 #> 6 26.7 -2.46 -0.459 1.75 -5.24 5.04 #> 7 21.4 1.46 -0.674 -0.894 -3.91 -3.38 #> 8 21.7 2.21 1.28 -1.05 -0.561 2.99 #> 9 -8.84 1.73 0.0725 0.0976 5.40 4.30 #> 10 24.5 -0.916 -0.223 -0.561 -4.12 0.0508 #> # ℹ 999,990 more rows #> # ℹ 15 more variables: predictor_06 , predictor_07 , #> # predictor_08 , predictor_09 , predictor_10 , #> # predictor_11 , predictor_12 , predictor_13 , #> # predictor_14 , predictor_15 , predictor_16 , #> # predictor_17 , predictor_18 , predictor_19 , #> # predictor_20 res_mem <- syrup({ res <- tune_grid( linear_reg(engine = \"glmnet\", penalty = tune()), outcome ~ ., vfold_cv(dat) ) }) res_mem #> # A tibble: 158 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-03 11:42:38 67101 60522 R NA 1.05GB 393GB #> 2 1 2024-07-03 11:42:38 60522 60300 rsession-arm64 NA 653.44MB 394GB #> 3 1 2024-07-03 11:42:38 58919 1 R NA 838.56MB 393GB #> 4 1 2024-07-03 11:42:38 97009 1 rsession-arm64 NA 128KB 394GB #> 5 1 2024-07-03 11:42:38 97008 1 rsession-arm64 NA 128KB 394GB #> 6 1 2024-07-03 11:42:38 97007 1 rsession-arm64 NA 240KB 394GB #> 7 1 2024-07-03 11:42:38 97006 1 rsession-arm64 NA 240KB 394GB #> 8 1 2024-07-03 11:42:38 97005 1 rsession-arm64 NA 128KB 394GB #> 9 1 2024-07-03 11:42:38 91012 1 R NA 128KB 393GB #> 10 1 2024-07-03 11:42:38 90999 1 R NA 128KB 393GB #> # ℹ 148 more rows worker_ppid <- ps::ps_pid() res_mem %>% filter(ppid == worker_ppid | pid == worker_ppid) %>% ggplot() + aes(x = id, y = rss, group = pid) + geom_line() + scale_x_continuous(breaks = 1:max(res_mem$id)) res_mem %>% filter(ppid == worker_ppid | pid == worker_ppid) %>% ggplot() + aes(x = id, y = pct_cpu, group = pid) + geom_line() + scale_x_continuous(breaks = 1:max(res_mem$id))"},{"path":"https://simonpcouch.github.io/syrup/index.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Measure Memory and CPU Usage for Parallel R Code","text":"much verbiage package assumes supplied expression distributed across CPU cores, ’s nothing specific package necessitates expression provided syrup() run parallel. Said another way, syrup work just fine “normal,” sequentially-run R code. said, many better, fine-grained tools job case sequential R code, Rprofmem(), profmem package, bench package, packages R-prof GitHub organization. Results syrup provide enough detail coarsest analyses memory CPU usage, provide entry point “profiling” system resource usage R code runs parallel.","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup-package.html","id":null,"dir":"Reference","previous_headings":"","what":"syrup: Measure Memory and CPU Usage for Parallel R Code — syrup-package","title":"syrup: Measure Memory and CPU Usage for Parallel R Code — syrup-package","text":"Measures memory CPU usage R code regularly taking snapshots calls system command 'ps'. package provides entry point (albeit coarse) profile usage system resources R code run parallel.","code":""},{"path":[]},{"path":"https://simonpcouch.github.io/syrup/reference/syrup-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"syrup: Measure Memory and CPU Usage for Parallel R Code — syrup-package","text":"Maintainer: Simon Couch simon.couch@posit.co (ORCID) contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":null,"dir":"Reference","previous_headings":"","what":"Memory and CPU Usage Information for Parallel R Code — syrup","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"function wrapper around system command ps can used benchmark (peak) memory CPU usage parallel R code. taking snapshots memory usage R processes regular interval, function dynamically builds profile usage system resources.","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"","code":"syrup(expr, interval = 0.5, peak = FALSE, env = caller_env())"},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"expr expression. interval interval take snapshots respirce usage. practice, overhead top intervals. peak Whether return rows \"peak\" memory usage. Interpreted id maximum rss sum. Defaults FALSE, may helpful set peak = TRUE potentially long-running processes tibble grow large. env environment evaluate expr .","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"tibble columns id time number columns ps::ps() output describing memory CPU usage. Notably, process ID pid, parent process ID ppid, percent CPU usage, resident set size rss (measure memory usage).","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"much verbiage package assumes supplied expression distributed across CPU cores, nothing specific package necessitates expression provided syrup() run parallel. Said another way, syrup() work just fine \"normal,\" sequentially-run R code (examples). said, many better, fine-grained tools job case sequential R code, Rprofmem(), profmem package, bench package, packages R-prof GitHub organization. Loosely, function works : Setting another R process (call sesh) queries system information using ps::ps() regular interval, Evaluating supplied expression, Reading queried system information back main process sesh, Closing sesh, Returning queried system information. Note information R process sesh filtered results automatically.","code":""},{"path":"https://simonpcouch.github.io/syrup/reference/syrup.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Memory and CPU Usage Information for Parallel R Code — syrup","text":"","code":"# pass any expression to syrup. first, sequentially: res_syrup <- syrup({res_output <- Sys.sleep(1)}) res_syrup #> # A tibble: 3 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-18 14:10:29 5744 1663 R NA 269MB 928MB #> 2 2 2024-07-18 14:10:29 5744 1663 R 0 269MB 928MB #> 3 3 2024-07-18 14:10:30 5744 1663 R 0 269MB 928MB # to snapshot memory and CPU information more (or less) often, set `interval` syrup(Sys.sleep(1), interval = .01) #> # A tibble: 13 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-18 14:10:31 5744 1663 R NA 273MB 932MB #> 2 2 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 3 3 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 4 4 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 5 5 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 6 6 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 7 7 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 8 8 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 9 9 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 10 10 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 11 11 2024-07-18 14:10:31 5744 1663 R 0 273MB 932MB #> 12 12 2024-07-18 14:10:32 5744 1663 R 0 273MB 932MB #> 13 13 2024-07-18 14:10:32 5744 1663 R 0 273MB 932MB # use `peak = TRUE` to return only the snapshot with # the highest memory usage (as `sum(rss)`) syrup(Sys.sleep(1), interval = .01, peak = TRUE) #> # A tibble: 1 × 8 #> id time pid ppid name pct_cpu rss vms #> #> 1 1 2024-07-18 14:10:32 5744 1663 R NA 274MB 933MB # results from syrup are more---or maybe only---useful when # computations are evaluated in parallel. see package README # for an example."},{"path":"https://simonpcouch.github.io/syrup/news/index.html","id":"syrup-011","dir":"Changelog","previous_headings":"","what":"syrup 0.1.1","title":"syrup 0.1.1","text":"Resolves CRAN test failures.","code":""},{"path":"https://simonpcouch.github.io/syrup/news/index.html","id":"syrup-010","dir":"Changelog","previous_headings":"","what":"syrup 0.1.0","title":"syrup 0.1.0","text":"CRAN release: 2024-07-09 Initial CRAN submission.","code":""}]
Couch S (2024). syrup: Measure Memory and CPU Usage for Parallel R Code. -R package version 0.1.0.9000, https://simonpcouch.github.io/syrup/, https://github.com/simonpcouch/syrup. +R package version 0.1.1, https://simonpcouch.github.io/syrup/, https://github.com/simonpcouch/syrup.
@Manual{, title = {syrup: Measure Memory and CPU Usage for Parallel R Code}, author = {Simon Couch}, year = {2024}, - note = {R package version 0.1.0.9000, https://simonpcouch.github.io/syrup/}, + note = {R package version 0.1.1, https://simonpcouch.github.io/syrup/}, url = {https://github.com/simonpcouch/syrup}, }