From c69543dc5fac6c4080bb1c631bd956ad65f3dab3 Mon Sep 17 00:00:00 2001 From: Sam Abbott Date: Tue, 17 Sep 2024 17:30:30 +0100 Subject: [PATCH] Issue 36: Add benchmarking of R, Stan, and fitdistdoublecens (#79) * add touchstone GH actions * add touchstone infra * add initial content * fix touchstone * install all dependencies * install cmdstanr * fix YAML * pass cmdstanr repo to touchstone * simplify benchmarking * move setup.R content into main script * add a news item * tune benchmark setttings --- .Rbuildignore | 1 + .github/workflows/benchmarks-comment.yaml | 20 ++ .github/workflows/benchmarks.yaml | 81 +++++++ NEWS.md | 4 + touchstone/.gitignore | 6 + touchstone/config.json | 5 + touchstone/footer.R | 10 + touchstone/header.R | 13 ++ touchstone/script.R | 265 ++++++++++++++++++++++ 9 files changed, 405 insertions(+) create mode 100644 .github/workflows/benchmarks-comment.yaml create mode 100644 .github/workflows/benchmarks.yaml create mode 100644 touchstone/.gitignore create mode 100644 touchstone/config.json create mode 100644 touchstone/footer.R create mode 100644 touchstone/header.R create mode 100644 touchstone/script.R diff --git a/.Rbuildignore b/.Rbuildignore index c8dc64f..40321ac 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -25,3 +25,4 @@ ^vignettes/using-stan-tools\.Rmd$ ^vignettes/fitting-dists-with-stan\.Rmd$ ^inst/pcens_model$ +^touchstone$ \ No newline at end of file diff --git a/.github/workflows/benchmarks-comment.yaml b/.github/workflows/benchmarks-comment.yaml new file mode 100644 index 0000000..c901652 --- /dev/null +++ b/.github/workflows/benchmarks-comment.yaml @@ -0,0 +1,20 @@ +name: Benchmarks (Comment) + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref }} + cancel-in-progress: true + +on: + workflow_run: + workflows: + - Benchmarks + types: + - completed + +jobs: + upload: + runs-on: ubuntu-latest + steps: + - uses: lorenzwalthert/touchstone/actions/comment@main + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml new file mode 100644 index 0000000..5baf6ad --- /dev/null +++ b/.github/workflows/benchmarks.yaml @@ -0,0 +1,81 @@ +name: Benchmarks + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref }} + cancel-in-progress: true + +on: + pull_request: + paths: + # Directories with source code and benchmarking code + - "inst/stan/**" + - "R/**" + - "src/**" + - "touchstone/**" + # Stan model + - "inst/pcens_model.stan" + # Benchmarking config file + - ".github/workflows/benchmarks*.yaml" + # Benchmarking action + - ".github/actions/touchstone-recieve/**" + # Package metadata + - DESCRIPTION + workflow_dispatch: + +jobs: + prepare: + runs-on: ubuntu-latest + outputs: + config: ${{ steps.read_touchstone_config.outputs.config }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - id: read_touchstone_config + run: | + content=`cat ./touchstone/config.json` + # the following lines are only required for multi line json + content="${content//'%'/'%25'}" + content="${content//$'\n'/'%0A'}" + content="${content//$'\r'/'%0D'}" + # end of optional handling for multi line json + echo "::set-output name=config::$content" + build: + needs: prepare + runs-on: ${{ matrix.config.os }} + strategy: + fail-fast: false + matrix: + config: + - ${{ fromJson(needs.prepare.outputs.config) }} + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + extra-repositories: 'https://stan-dev.r-universe.dev' + + - name: Install cmdstanr + run: install.packages("cmdstanr") + shell: Rscript {0} + + - name: Install cmdstan + uses: epinowcast/actions/install-cmdstan@v1 + with: + cmdstan-version: 'latest' + num-cores: 2 + + - uses: seabbs/touchstone/actions/receive@main + with: + cache-version: 1 + touchstone_ref: '@f5c859e' + benchmarking_repo: ${{ matrix.config.benchmarking_repo }} + benchmarking_ref: ${{ matrix.config.benchmarking_ref }} + benchmarking_path: ${{ matrix.config.benchmarking_path }} + extra-repositories: 'https://stan-dev.r-universe.dev' diff --git a/NEWS.md b/NEWS.md index fa22f4d..166c069 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ This is the current development version. +## Package + +* Add `{touchstone}` based benchmarks for benchmarking R utility functions, and fitting the `stan` and `fitdistplus` models. + # primarycensoreddist 0.4.0 In this release, we have added a new package `stan` model for fitting distributions using the `cmdstanr` package. We have also added a new function `fitdistdoublecens()` to allow for fitting of double censored and truncated data using the `fitdistrplus` package. As well as these functionality improvements this release focuses on improving the stability of the `stan` model and improving the speed of the `primary_censored_ode` function. diff --git a/touchstone/.gitignore b/touchstone/.gitignore new file mode 100644 index 0000000..ee8eacf --- /dev/null +++ b/touchstone/.gitignore @@ -0,0 +1,6 @@ +* +!script.R +!config.json +!.gitignore +!header.R +!footer.R diff --git a/touchstone/config.json b/touchstone/config.json new file mode 100644 index 0000000..d75e7ab --- /dev/null +++ b/touchstone/config.json @@ -0,0 +1,5 @@ +{ + "os": "ubuntu-latest", + "r": "release", + "rspm": "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" +} \ No newline at end of file diff --git a/touchstone/footer.R b/touchstone/footer.R new file mode 100644 index 0000000..4e7494c --- /dev/null +++ b/touchstone/footer.R @@ -0,0 +1,10 @@ +# You can modify the PR comment footer here. You can use github markdown e.g. +# emojis like :tada:. +# This file will be parsed and evaluate within the context of +# `benchmark_analyze` and should return the comment text as the last value. +# See `?touchstone::pr_comment` +link <- "https://lorenzwalthert.github.io/touchstone/articles/inference.html" +glue::glue( + "\nFurther explanation regarding interpretation", + " and methodology can be found in the [documentation of `touchstone`]({link})." # nolint +) diff --git a/touchstone/header.R b/touchstone/header.R new file mode 100644 index 0000000..cb1c36f --- /dev/null +++ b/touchstone/header.R @@ -0,0 +1,13 @@ +# You can modify the PR comment header here. You can use github markdown e.g. +# emojis like :tada:. +# This file will be parsed and evaluate within the context of +# `benchmark_analyze` and should return the comment text as the last value. +# Available variables for glue substitution: +# * ci: confidence interval +# * branches: BASE and HEAD branches benchmarked against each other. +# See `?touchstone::pr_comment` +glue::glue( + "This is how benchmark results would change (along with a", + " {100 * ci}% confidence interval in relative change) if ", + "{system2('git', c('rev-parse', 'HEAD'), stdout = TRUE)} is merged into {branches[1]}:\n" # nolint +) diff --git a/touchstone/script.R b/touchstone/script.R new file mode 100644 index 0000000..1b7b637 --- /dev/null +++ b/touchstone/script.R @@ -0,0 +1,265 @@ +# see `help(run_script, package = 'touchstone')` on how to run this +# interactively + +# installs branches to benchmark +touchstone::branch_install() + +# Benchmark for pprimarycensoreddist with lognormal distribution +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + q <- seq(0, 10, by = 0.01) + }, + pprimarycensoreddist_lnorm = { + pprimarycensoreddist(q, plnorm, meanlog = 0, sdlog = 1, D = 12) + }, + n = 20 +) + +# Benchmark for pprimarycensoreddist with exponential growth +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + q <- seq(0, 10, by = 0.01) + }, + pprimarycensoreddist_expgrowth = { + pprimarycensoreddist( + q, plnorm, + dprimary = dexpgrowth, + dprimary_args = list(r = 0.2), + meanlog = 0, sdlog = 1, D = 12 + ) + }, + n = 20 +) + +# Benchmark for dprimarycensoreddist with Weibull distribution +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + x <- seq(0, 10, by = 1) + }, + dprimarycensoreddist_weibull = { + dprimarycensoreddist(x, pweibull, shape = 1.5, scale = 2.0, D = 12) + }, + n = 20 +) + +# Benchmark for dprimarycensoreddist with exponential growth +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + x <- seq(0, 10, by = 1) + }, + dprimarycensoreddist_expgrowth = { + dprimarycensoreddist( + x, pweibull, + dprimary = dexpgrowth, + dprimary_args = list(r = 0.2), + shape = 1.5, scale = 2.0, D = 12 + ) + }, + n = 20 +) + +# Benchmark for fitdistdoublecens with normal distribution +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + library(fitdistrplus) + set.seed(123) + n <- 1000 + true_mean <- 5 + true_sd <- 2 + pwindow <- 1 + swindow <- 1 + D <- 10 + samples <- rprimarycensoreddist( + n, rnorm, + mean = true_mean, sd = true_sd, + pwindow = pwindow, swindow = swindow, D = D + ) + delay_data <- data.frame( + left = samples, + right = samples + swindow + ) + }, + fitdistdoublecens_normal = { + fitdistdoublecens( + delay_data, + distr = "norm", + start = list(mean = 0, sd = 1), + D = D, + pwindow = pwindow + ) + }, + n = 10 +) + +# Benchmark for fitdistdoublecens with exponential growth +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + library(fitdistrplus) + set.seed(456) + n <- 1000 + true_shape <- 2 + true_rate <- 0.5 + pwindow <- 1 + swindow <- 1 + D <- 8 + samples <- rprimarycensoreddist( + n, rgamma, + shape = true_shape, rate = true_rate, + pwindow = pwindow, swindow = swindow, D = D, + ) + delay_data <- data.frame( + left = samples, + right = samples + swindow + ) + }, + fitdistdoublecens_gamma = { + fitdistdoublecens( + delay_data, + distr = "gamma", + start = list(shape = 1, rate = 1), + D = D, + pwindow = pwindow + ) + }, + n = 10 +) + +# Benchmark for fitting lognormal distribution +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + library(dplyr) + library(cmdstanr) + + cmdstanr::set_cmdstan_path() + options(mc.cores = 2) + + set.seed(123) + n1 <- 2000 + true_meanlog1 <- 1.5 + true_sdlog1 <- 0.5 + + simulated_delays1 <- rprimarycensoreddist( + n = n1, + rdist = rlnorm, + meanlog = true_meanlog1, + sdlog = true_sdlog1, + pwindow = 1, + D = 10 + ) + + example_data1 <- data.frame( + delay = simulated_delays1, + delay_upper = simulated_delays1 + 1, + pwindow = 1, + relative_obs_time = 10 + ) + + delay_counts1 <- example_data1 |> + summarise( + n = n(), + .by = c(pwindow, relative_obs_time, delay, delay_upper) + ) + + stan_data1 <- pcd_as_cmdstan_data( + delay_counts1, + dist_id = 1, + primary_dist_id = 1, + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)) + ) + + model <- suppressMessages(suppressWarnings(pcd_cmdstan_model())) + }, + cmdstan_fit_lognormal = { + fit1 <- model$sample( + data = stan_data1, + seed = 123, + chains = 2, + parallel_chains = 2, + refresh = 0, + show_messages = FALSE, + iter_warmup = 500, + iter_sampling = 500 + ) + }, + n = 10 +) + +# Benchmark for fitting gamma distribution +touchstone::benchmark_run( + expr_before_benchmark = { + library(primarycensoreddist) + library(dplyr) + library(cmdstanr) + + cmdstanr::set_cmdstan_path() + options(mc.cores = 2) + + set.seed(456) + n2 <- 2000 + true_shape2 <- 2 + true_rate2 <- 0.5 + + simulated_delays2 <- rprimarycensoreddist( + n = n2, + rdist = rgamma, + shape = true_shape2, + rate = true_rate2, + pwindow = 2, + swindow = 2, + D = 8, + rprimary = rexpgrowth, + rprimary_args = list(r = 0.1) + ) + + example_data2 <- data.frame( + delay = simulated_delays2, + delay_upper = simulated_delays2 + 2, + pwindow = 2, + relative_obs_time = 8 + ) + + delay_counts2 <- example_data2 |> + summarise( + n = n(), + .by = c(pwindow, relative_obs_time, delay, delay_upper) + ) + + stan_data2 <- pcd_as_cmdstan_data( + delay_counts2, + dist_id = 2, + primary_dist_id = 2, + param_bounds = list(lower = c(0, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = 0, upper = Inf), + priors = list(location = c(2, 1), scale = c(0.5, 0.5)), + primary_priors = list(location = 0.1, scale = 0.1) + ) + + model <- suppressMessages(suppressWarnings(pcd_cmdstan_model())) + }, + cmdstan_fit_gamma = { + fit2 <- model$sample( + data = stan_data2, + seed = 456, + chains = 2, + parallel_chains = 2, + refresh = 0, + show_messages = FALSE, + iter_warmup = 500, + iter_sampling = 500 + ) + }, + n = 10 +) + +# create artifacts used downstream in the GitHub Action. +touchstone::benchmark_analyze()