From 2a27c144d1693f507da996475e1548fe44b7a4fc Mon Sep 17 00:00:00 2001
From: Ben Butler-Cole <ben@bridesmere.com>
Date: Tue, 24 Sep 2024 10:45:39 +0100
Subject: [PATCH 1/3] Clarify required permissions

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index 38f183e..428ea3f 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -26,7 +26,7 @@ Code scanning alerts, Dependabot alerts, Issues, Metadata, Pull requests and Rep
 
 The `GITHUB_OS_TOKEN` is a fine-grained GitHub personal access token that is used for authenticating with the GitHub REST API.
 It is assigned to a single organisation and should have the following *read-only* permissions:
-* organisation permissions: codespaces
+* organisation permissions: organisation codespaces
 * *all repositories* owned by the organisation with the following permissions:
 Codespaces and Metadata
 

From 3eb3c4438fa62338dc960b9b518e157f0959f1e6 Mon Sep 17 00:00:00 2001
From: Ben Butler-Cole <ben@bridesmere.com>
Date: Tue, 24 Sep 2024 12:06:33 +0100
Subject: [PATCH 2/3] Remove unused log configuration

---
 metrics/logs.py | 67 -------------------------------------------------
 1 file changed, 67 deletions(-)
 delete mode 100644 metrics/logs.py

diff --git a/metrics/logs.py b/metrics/logs.py
deleted file mode 100644
index 5f54d28..0000000
--- a/metrics/logs.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import logging.config
-
-import structlog
-
-
-timestamper = structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S")
-pre_chain = [
-    # Add the log level and a timestamp to the event_dict if the log entry
-    # is not from structlog.
-    structlog.stdlib.add_log_level,
-    # Add extra attributes of LogRecord objects to the event dictionary
-    # so that values passed in the extra parameter of log methods pass
-    # through to log output.
-    structlog.stdlib.ExtraAdder(),
-    timestamper,
-]
-
-
-def setup_logging(debug=False):
-    logging.config.dictConfig(
-        {
-            "version": 1,
-            "disable_existing_loggers": False,
-            "formatters": {
-                "colored": {
-                    "()": structlog.stdlib.ProcessorFormatter,
-                    "processors": [
-                        structlog.stdlib.ProcessorFormatter.remove_processors_meta,
-                        structlog.dev.ConsoleRenderer(colors=True),
-                    ],
-                    "foreign_pre_chain": pre_chain,
-                },
-            },
-            "handlers": {
-                "console": {
-                    "level": "DEBUG",
-                    "class": "logging.StreamHandler",
-                    "formatter": "colored",
-                },
-            },
-            "loggers": {
-                "": {
-                    "handlers": ["console"],
-                    "level": "DEBUG" if debug else "INFO",
-                    "propagate": True,
-                },
-                "sqlalchemy": {
-                    "handlers": ["console"],
-                    "level": "WARNING",
-                    "propagate": False,
-                },
-            },
-        }
-    )
-    structlog.configure(
-        processors=[
-            structlog.stdlib.add_log_level,
-            structlog.stdlib.PositionalArgumentsFormatter(),
-            timestamper,
-            structlog.processors.StackInfoRenderer(),
-            structlog.processors.format_exc_info,
-            structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
-        ],
-        logger_factory=structlog.stdlib.LoggerFactory(),
-        wrapper_class=structlog.stdlib.BoundLogger,
-        cache_logger_on_first_use=True,
-    )

From 91ff73821697745a66006f4e245b30a238254c4c Mon Sep 17 00:00:00 2001
From: Ben Butler-Cole <ben@bridesmere.com>
Date: Tue, 24 Sep 2024 12:41:07 +0100
Subject: [PATCH 3/3] Add optional caching of GitHub API requests

This seems to be the least invasive way of avoiding the overhead of
calling the GitHub API when developing the metrics. The most obvious
alternative would be persistently-memoizing the functions in the `query`
module (e.g. with `cachier`), but that would require more code.
---
 .gitignore               |  1 +
 DEVELOPERS.md            | 19 ++++++++++++++++++-
 justfile                 |  4 ++++
 metrics/github/client.py |  9 +++++++++
 requirements.prod.in     |  1 +
 requirements.prod.txt    | 29 +++++++++++++++++++++++++++++
 6 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 556fb94..7902c14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
 __pycache__/
 github.db
 htmlcov
+github-cache.sqlite
diff --git a/DEVELOPERS.md b/DEVELOPERS.md
index e1bdda5..6c12225 100644
--- a/DEVELOPERS.md
+++ b/DEVELOPERS.md
@@ -71,7 +71,7 @@ e.g `just metrics prs` to run metrics/tasks/prs.py
 
 All tasks are defined in `metrics/tasks` and must have a `main()` function that takes no arguments.
 
-### Fast debug mode
+### Speeding up development
 
 You can set a flag to trigger a fast mode which only retrieves and handful of PRs
 but allows the main code paths to be tested quickly.
@@ -80,6 +80,23 @@ but allows the main code paths to be tested quickly.
 DEBUG_FAST=t just metrics prs
 ```
 
+Alternatively you can turn on caching of GitHub API requests.
+This is particularly useful when iterating on metric definitions
+without changing the data that we retrieve from the API.
+
+```
+DEBUG_CACHE=t just metrics prs
+```
+
+NB that the cache has no expiry time
+(although it will be bypassed on subsequent runs if `DEBUG_CACHE isn't defined).
+You can clear the cache explicitly.
+
+```
+just clean-cache
+```
+
+
 ## Tests
 Run the tests with:
 ```
diff --git a/justfile b/justfile
index 703f986..16275e6 100644
--- a/justfile
+++ b/justfile
@@ -168,3 +168,7 @@ docker-build env="dev": _env
 docker-run env="dev" *args="": _env
     {{ just_executable() }} docker-build {{ env }}
     docker compose run --rm metrics-{{ env }} {{ args }}
+
+# See DEVELOPERS.md
+clean-cache:
+    rm -f github-cache.sqlite
diff --git a/metrics/github/client.py b/metrics/github/client.py
index a0d1855..a702f37 100644
--- a/metrics/github/client.py
+++ b/metrics/github/client.py
@@ -1,13 +1,22 @@
 import json
+import os
 import textwrap
 
 import requests
 import requests.utils
+import requests_cache
 import structlog
 
 
 log = structlog.get_logger()
 
+# See DEVELOPERS.md
+if "DEBUG_CACHE" in os.environ:
+    requests_cache.install_cache(
+        "github-cache",
+        # Turn on caching for POST requests because that's what GraphQL uses
+        allowable_methods=("GET", "HEAD", "POST"),
+    )
 
 session = requests.Session()
 
diff --git a/requirements.prod.in b/requirements.prod.in
index 230eded..68266c9 100644
--- a/requirements.prod.in
+++ b/requirements.prod.in
@@ -1,5 +1,6 @@
   greenlet
   requests
+  requests-cache
   slack-bolt
   sqlalchemy[postgresql_psycopgbinary]
   structlog
diff --git a/requirements.prod.txt b/requirements.prod.txt
index d672359..a1fac02 100644
--- a/requirements.prod.txt
+++ b/requirements.prod.txt
@@ -4,6 +4,16 @@
 #
 #    pip-compile --allow-unsafe --generate-hashes --output-file=requirements.prod.txt requirements.prod.in
 #
+attrs==24.2.0 \
+    --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \
+    --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2
+    # via
+    #   cattrs
+    #   requests-cache
+cattrs==24.1.2 \
+    --hash=sha256:67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0 \
+    --hash=sha256:8028cfe1ff5382df59dd36474a86e02d817b06eaf8af84555441bac915d2ef85
+    # via requests-cache
 certifi==2024.8.30 \
     --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \
     --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9
@@ -183,6 +193,10 @@ idna==3.10 \
     --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \
     --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3
     # via requests
+platformdirs==4.3.6 \
+    --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \
+    --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb
+    # via requests-cache
 psycopg[binary]==3.2.2 \
     --hash=sha256:8bad2e497ce22d556dac1464738cb948f8d6bab450d965cf1d8a8effd52412e0 \
     --hash=sha256:babf565d459d8f72fb65da5e211dd0b58a52c51e4e1fa9cadecff42d6b7619b2
@@ -256,11 +270,21 @@ psycopg-binary==3.2.2 \
 requests==2.32.3 \
     --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \
     --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6
+    # via
+    #   -r requirements.prod.in
+    #   requests-cache
+requests-cache==1.2.1 \
+    --hash=sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603 \
+    --hash=sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1
     # via -r requirements.prod.in
 sentry-sdk==2.14.0 \
     --hash=sha256:1e0e2eaf6dad918c7d1e0edac868a7bf20017b177f242cefe2a6bcd47955961d \
     --hash=sha256:b8bc3dc51d06590df1291b7519b85c75e2ced4f28d9ea655b6d54033503b5bf4
     # via -r requirements.prod.in
+six==1.16.0 \
+    --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
+    --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
+    # via url-normalize
 slack-bolt==1.20.1 \
     --hash=sha256:4657e592339797b9b804547a21e6b35dd8e2cd1eab676bfb23960660aae049fd \
     --hash=sha256:8fa26e72b0e55c18c1d34a73558e7fe2150bdc7c947de780b938fdb1d7e854fe
@@ -330,9 +354,14 @@ typing-extensions==4.12.2 \
     # via
     #   psycopg
     #   sqlalchemy
+url-normalize==1.4.3 \
+    --hash=sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2 \
+    --hash=sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed
+    # via requests-cache
 urllib3==2.2.3 \
     --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \
     --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9
     # via
     #   requests
+    #   requests-cache
     #   sentry-sdk