From c69543dc5fac6c4080bb1c631bd956ad65f3dab3 Mon Sep 17 00:00:00 2001
From: Sam Abbott <s.e.abbott12@gmail.com>
Date: Tue, 17 Sep 2024 17:30:30 +0100
Subject: [PATCH] Issue 36: Add benchmarking of R, Stan, and fitdistdoublecens
 (#79)

* add touchstone GH actions

* add touchstone infra

* add initial content

* fix touchstone

* install all dependencies

* install cmdstanr

* fix YAML

* pass cmdstanr repo to touchstone

* simplify benchmarking

* move setup.R content into main script

* add a news item

* tune benchmark setttings
---
 .Rbuildignore                             |   1 +
 .github/workflows/benchmarks-comment.yaml |  20 ++
 .github/workflows/benchmarks.yaml         |  81 +++++++
 NEWS.md                                   |   4 +
 touchstone/.gitignore                     |   6 +
 touchstone/config.json                    |   5 +
 touchstone/footer.R                       |  10 +
 touchstone/header.R                       |  13 ++
 touchstone/script.R                       | 265 ++++++++++++++++++++++
 9 files changed, 405 insertions(+)
 create mode 100644 .github/workflows/benchmarks-comment.yaml
 create mode 100644 .github/workflows/benchmarks.yaml
 create mode 100644 touchstone/.gitignore
 create mode 100644 touchstone/config.json
 create mode 100644 touchstone/footer.R
 create mode 100644 touchstone/header.R
 create mode 100644 touchstone/script.R

diff --git a/.Rbuildignore b/.Rbuildignore
index c8dc64f..40321ac 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -25,3 +25,4 @@
 ^vignettes/using-stan-tools\.Rmd$
 ^vignettes/fitting-dists-with-stan\.Rmd$
 ^inst/pcens_model$
+^touchstone$
\ No newline at end of file
diff --git a/.github/workflows/benchmarks-comment.yaml b/.github/workflows/benchmarks-comment.yaml
new file mode 100644
index 0000000..c901652
--- /dev/null
+++ b/.github/workflows/benchmarks-comment.yaml
@@ -0,0 +1,20 @@
+name: Benchmarks (Comment)
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
+
+on:
+  workflow_run:
+    workflows:
+      - Benchmarks
+    types:
+      - completed
+
+jobs:
+  upload:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: lorenzwalthert/touchstone/actions/comment@main
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml
new file mode 100644
index 0000000..5baf6ad
--- /dev/null
+++ b/.github/workflows/benchmarks.yaml
@@ -0,0 +1,81 @@
+name: Benchmarks
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+    paths:
+      # Directories with source code and benchmarking code
+      - "inst/stan/**"
+      - "R/**"
+      - "src/**"
+      - "touchstone/**"
+      # Stan model
+      - "inst/pcens_model.stan"
+      # Benchmarking config file
+      - ".github/workflows/benchmarks*.yaml"
+      # Benchmarking action
+      - ".github/actions/touchstone-recieve/**"
+      # Package metadata
+      - DESCRIPTION
+  workflow_dispatch:
+
+jobs:
+  prepare:
+    runs-on: ubuntu-latest
+    outputs:
+      config: ${{ steps.read_touchstone_config.outputs.config }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - id: read_touchstone_config
+        run: |
+          content=`cat ./touchstone/config.json`
+          # the following lines are only required for multi line json
+          content="${content//'%'/'%25'}"
+          content="${content//$'\n'/'%0A'}"
+          content="${content//$'\r'/'%0D'}"
+          # end of optional handling for multi line json
+          echo "::set-output name=config::$content"
+  build:
+    needs: prepare
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - ${{ fromJson(needs.prepare.outputs.config) }}
+    env:
+      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
+      RSPM: ${{ matrix.config.rspm }}
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+          extra-repositories: 'https://stan-dev.r-universe.dev'
+
+      - name: Install cmdstanr
+        run: install.packages("cmdstanr")
+        shell: Rscript {0}
+
+      - name: Install cmdstan
+        uses: epinowcast/actions/install-cmdstan@v1
+        with:
+          cmdstan-version: 'latest'
+          num-cores: 2
+
+      - uses: seabbs/touchstone/actions/receive@main
+        with:
+          cache-version: 1
+          touchstone_ref: '@f5c859e'
+          benchmarking_repo: ${{ matrix.config.benchmarking_repo }}
+          benchmarking_ref: ${{ matrix.config.benchmarking_ref }}
+          benchmarking_path: ${{ matrix.config.benchmarking_path }}
+          extra-repositories: 'https://stan-dev.r-universe.dev'
diff --git a/NEWS.md b/NEWS.md
index fa22f4d..166c069 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,10 @@
 
 This is the current development version.
 
+## Package
+
+* Add `{touchstone}` based benchmarks for benchmarking R utility functions, and fitting the `stan` and `fitdistplus` models.
+
 # primarycensoreddist 0.4.0
 
 In this release, we have added a new package `stan` model for fitting distributions using the `cmdstanr` package. We have also added a new function `fitdistdoublecens()` to allow for fitting of double censored and truncated data using the `fitdistrplus` package. As well as these functionality improvements this release focuses on improving the stability of the `stan` model and improving the speed of the `primary_censored_ode` function.
diff --git a/touchstone/.gitignore b/touchstone/.gitignore
new file mode 100644
index 0000000..ee8eacf
--- /dev/null
+++ b/touchstone/.gitignore
@@ -0,0 +1,6 @@
+*
+!script.R
+!config.json
+!.gitignore
+!header.R
+!footer.R
diff --git a/touchstone/config.json b/touchstone/config.json
new file mode 100644
index 0000000..d75e7ab
--- /dev/null
+++ b/touchstone/config.json
@@ -0,0 +1,5 @@
+{
+  "os": "ubuntu-latest",
+  "r": "release",
+  "rspm": "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+}
\ No newline at end of file
diff --git a/touchstone/footer.R b/touchstone/footer.R
new file mode 100644
index 0000000..4e7494c
--- /dev/null
+++ b/touchstone/footer.R
@@ -0,0 +1,10 @@
+# You can modify the PR comment footer here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# See `?touchstone::pr_comment`
+link <- "https://lorenzwalthert.github.io/touchstone/articles/inference.html"
+glue::glue(
+  "\nFurther explanation regarding interpretation",
+  " and methodology can be found in the [documentation of `touchstone`]({link})." # nolint
+)
diff --git a/touchstone/header.R b/touchstone/header.R
new file mode 100644
index 0000000..cb1c36f
--- /dev/null
+++ b/touchstone/header.R
@@ -0,0 +1,13 @@
+# You can modify the PR comment header here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# Available variables for glue substitution:
+# * ci: confidence interval
+# * branches: BASE and HEAD branches benchmarked against each other.
+# See `?touchstone::pr_comment`
+glue::glue(
+  "This is how benchmark results would change (along with a",
+  " {100 * ci}% confidence interval in relative change) if ",
+  "{system2('git', c('rev-parse', 'HEAD'), stdout = TRUE)} is merged into {branches[1]}:\n" # nolint
+)
diff --git a/touchstone/script.R b/touchstone/script.R
new file mode 100644
index 0000000..1b7b637
--- /dev/null
+++ b/touchstone/script.R
@@ -0,0 +1,265 @@
+# see `help(run_script, package = 'touchstone')` on how to run this
+# interactively
+
+# installs branches to benchmark
+touchstone::branch_install()
+
+# Benchmark for pprimarycensoreddist with lognormal distribution
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    q <- seq(0, 10, by = 0.01)
+  },
+  pprimarycensoreddist_lnorm = {
+    pprimarycensoreddist(q, plnorm, meanlog = 0, sdlog = 1, D = 12)
+  },
+  n = 20
+)
+
+# Benchmark for pprimarycensoreddist with exponential growth
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    q <- seq(0, 10, by = 0.01)
+  },
+  pprimarycensoreddist_expgrowth = {
+    pprimarycensoreddist(
+      q, plnorm,
+      dprimary = dexpgrowth,
+      dprimary_args = list(r = 0.2),
+      meanlog = 0, sdlog = 1, D = 12
+    )
+  },
+  n = 20
+)
+
+# Benchmark for dprimarycensoreddist with Weibull distribution
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    x <- seq(0, 10, by = 1)
+  },
+  dprimarycensoreddist_weibull = {
+    dprimarycensoreddist(x, pweibull, shape = 1.5, scale = 2.0, D = 12)
+  },
+  n = 20
+)
+
+# Benchmark for dprimarycensoreddist with exponential growth
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    x <- seq(0, 10, by = 1)
+  },
+  dprimarycensoreddist_expgrowth = {
+    dprimarycensoreddist(
+      x, pweibull,
+      dprimary = dexpgrowth,
+      dprimary_args = list(r = 0.2),
+      shape = 1.5, scale = 2.0, D = 12
+    )
+  },
+  n = 20
+)
+
+# Benchmark for fitdistdoublecens with normal distribution
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    library(fitdistrplus)
+    set.seed(123)
+    n <- 1000
+    true_mean <- 5
+    true_sd <- 2
+    pwindow <- 1
+    swindow <- 1
+    D <- 10
+    samples <- rprimarycensoreddist(
+      n, rnorm,
+      mean = true_mean, sd = true_sd,
+      pwindow = pwindow, swindow = swindow, D = D
+    )
+    delay_data <- data.frame(
+      left = samples,
+      right = samples + swindow
+    )
+  },
+  fitdistdoublecens_normal = {
+    fitdistdoublecens(
+      delay_data,
+      distr = "norm",
+      start = list(mean = 0, sd = 1),
+      D = D,
+      pwindow = pwindow
+    )
+  },
+  n = 10
+)
+
+# Benchmark for fitdistdoublecens with exponential growth
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    library(fitdistrplus)
+    set.seed(456)
+    n <- 1000
+    true_shape <- 2
+    true_rate <- 0.5
+    pwindow <- 1
+    swindow <- 1
+    D <- 8
+    samples <- rprimarycensoreddist(
+      n, rgamma,
+      shape = true_shape, rate = true_rate,
+      pwindow = pwindow, swindow = swindow, D = D,
+    )
+    delay_data <- data.frame(
+      left = samples,
+      right = samples + swindow
+    )
+  },
+  fitdistdoublecens_gamma = {
+    fitdistdoublecens(
+      delay_data,
+      distr = "gamma",
+      start = list(shape = 1, rate = 1),
+      D = D,
+      pwindow = pwindow
+    )
+  },
+  n = 10
+)
+
+# Benchmark for fitting lognormal distribution
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    library(dplyr)
+    library(cmdstanr)
+
+    cmdstanr::set_cmdstan_path()
+    options(mc.cores = 2)
+
+    set.seed(123)
+    n1 <- 2000
+    true_meanlog1 <- 1.5
+    true_sdlog1 <- 0.5
+
+    simulated_delays1 <- rprimarycensoreddist(
+      n = n1,
+      rdist = rlnorm,
+      meanlog = true_meanlog1,
+      sdlog = true_sdlog1,
+      pwindow = 1,
+      D = 10
+    )
+
+    example_data1 <- data.frame(
+      delay = simulated_delays1,
+      delay_upper = simulated_delays1 + 1,
+      pwindow = 1,
+      relative_obs_time = 10
+    )
+
+    delay_counts1 <- example_data1 |>
+      summarise(
+        n = n(),
+        .by = c(pwindow, relative_obs_time, delay, delay_upper)
+      )
+
+    stan_data1 <- pcd_as_cmdstan_data(
+      delay_counts1,
+      dist_id = 1,
+      primary_dist_id = 1,
+      param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)),
+      primary_param_bounds = list(lower = numeric(0), upper = numeric(0)),
+      priors = list(location = c(0, 1), scale = c(1, 1)),
+      primary_priors = list(location = numeric(0), scale = numeric(0))
+    )
+
+    model <- suppressMessages(suppressWarnings(pcd_cmdstan_model()))
+  },
+  cmdstan_fit_lognormal = {
+    fit1 <- model$sample(
+      data = stan_data1,
+      seed = 123,
+      chains = 2,
+      parallel_chains = 2,
+      refresh = 0,
+      show_messages = FALSE,
+      iter_warmup = 500,
+      iter_sampling = 500
+    )
+  },
+  n = 10
+)
+
+# Benchmark for fitting gamma distribution
+touchstone::benchmark_run(
+  expr_before_benchmark = {
+    library(primarycensoreddist)
+    library(dplyr)
+    library(cmdstanr)
+
+    cmdstanr::set_cmdstan_path()
+    options(mc.cores = 2)
+
+    set.seed(456)
+    n2 <- 2000
+    true_shape2 <- 2
+    true_rate2 <- 0.5
+
+    simulated_delays2 <- rprimarycensoreddist(
+      n = n2,
+      rdist = rgamma,
+      shape = true_shape2,
+      rate = true_rate2,
+      pwindow = 2,
+      swindow = 2,
+      D = 8,
+      rprimary = rexpgrowth,
+      rprimary_args = list(r = 0.1)
+    )
+
+    example_data2 <- data.frame(
+      delay = simulated_delays2,
+      delay_upper = simulated_delays2 + 2,
+      pwindow = 2,
+      relative_obs_time = 8
+    )
+
+    delay_counts2 <- example_data2 |>
+      summarise(
+        n = n(),
+        .by = c(pwindow, relative_obs_time, delay, delay_upper)
+      )
+
+    stan_data2 <- pcd_as_cmdstan_data(
+      delay_counts2,
+      dist_id = 2,
+      primary_dist_id = 2,
+      param_bounds = list(lower = c(0, 0), upper = c(Inf, Inf)),
+      primary_param_bounds = list(lower = 0, upper = Inf),
+      priors = list(location = c(2, 1), scale = c(0.5, 0.5)),
+      primary_priors = list(location = 0.1, scale = 0.1)
+    )
+
+    model <- suppressMessages(suppressWarnings(pcd_cmdstan_model()))
+  },
+  cmdstan_fit_gamma = {
+    fit2 <- model$sample(
+      data = stan_data2,
+      seed = 456,
+      chains = 2,
+      parallel_chains = 2,
+      refresh = 0,
+      show_messages = FALSE,
+      iter_warmup = 500,
+      iter_sampling = 500
+    )
+  },
+  n = 10
+)
+
+# create artifacts used downstream in the GitHub Action.
+touchstone::benchmark_analyze()