From 2a27c144d1693f507da996475e1548fe44b7a4fc Mon Sep 17 00:00:00 2001 From: Ben Butler-Cole Date: Tue, 24 Sep 2024 10:45:39 +0100 Subject: [PATCH 1/3] Clarify required permissions --- INSTALL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL.md b/INSTALL.md index 38f183e..428ea3f 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -26,7 +26,7 @@ Code scanning alerts, Dependabot alerts, Issues, Metadata, Pull requests and Rep The `GITHUB_OS_TOKEN` is a fine-grained GitHub personal access token that is used for authenticating with the GitHub REST API. It is assigned to a single organisation and should have the following *read-only* permissions: -* organisation permissions: codespaces +* organisation permissions: organisation codespaces * *all repositories* owned by the organisation with the following permissions: Codespaces and Metadata From 3eb3c4438fa62338dc960b9b518e157f0959f1e6 Mon Sep 17 00:00:00 2001 From: Ben Butler-Cole Date: Tue, 24 Sep 2024 12:06:33 +0100 Subject: [PATCH 2/3] Remove unused log configuration --- metrics/logs.py | 67 ------------------------------------------------- 1 file changed, 67 deletions(-) delete mode 100644 metrics/logs.py diff --git a/metrics/logs.py b/metrics/logs.py deleted file mode 100644 index 5f54d28..0000000 --- a/metrics/logs.py +++ /dev/null @@ -1,67 +0,0 @@ -import logging.config - -import structlog - - -timestamper = structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S") -pre_chain = [ - # Add the log level and a timestamp to the event_dict if the log entry - # is not from structlog. - structlog.stdlib.add_log_level, - # Add extra attributes of LogRecord objects to the event dictionary - # so that values passed in the extra parameter of log methods pass - # through to log output. - structlog.stdlib.ExtraAdder(), - timestamper, -] - - -def setup_logging(debug=False): - logging.config.dictConfig( - { - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "colored": { - "()": structlog.stdlib.ProcessorFormatter, - "processors": [ - structlog.stdlib.ProcessorFormatter.remove_processors_meta, - structlog.dev.ConsoleRenderer(colors=True), - ], - "foreign_pre_chain": pre_chain, - }, - }, - "handlers": { - "console": { - "level": "DEBUG", - "class": "logging.StreamHandler", - "formatter": "colored", - }, - }, - "loggers": { - "": { - "handlers": ["console"], - "level": "DEBUG" if debug else "INFO", - "propagate": True, - }, - "sqlalchemy": { - "handlers": ["console"], - "level": "WARNING", - "propagate": False, - }, - }, - } - ) - structlog.configure( - processors=[ - structlog.stdlib.add_log_level, - structlog.stdlib.PositionalArgumentsFormatter(), - timestamper, - structlog.processors.StackInfoRenderer(), - structlog.processors.format_exc_info, - structlog.stdlib.ProcessorFormatter.wrap_for_formatter, - ], - logger_factory=structlog.stdlib.LoggerFactory(), - wrapper_class=structlog.stdlib.BoundLogger, - cache_logger_on_first_use=True, - ) From 91ff73821697745a66006f4e245b30a238254c4c Mon Sep 17 00:00:00 2001 From: Ben Butler-Cole Date: Tue, 24 Sep 2024 12:41:07 +0100 Subject: [PATCH 3/3] Add optional caching of GitHub API requests This seems to be the least invasive way of avoiding the overhead of calling the GitHub API when developing the metrics. The most obvious alternative would be persistently-memoizing the functions in the `query` module (e.g. with `cachier`), but that would require more code. --- .gitignore | 1 + DEVELOPERS.md | 19 ++++++++++++++++++- justfile | 4 ++++ metrics/github/client.py | 9 +++++++++ requirements.prod.in | 1 + requirements.prod.txt | 29 +++++++++++++++++++++++++++++ 6 files changed, 62 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 556fb94..7902c14 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__/ github.db htmlcov +github-cache.sqlite diff --git a/DEVELOPERS.md b/DEVELOPERS.md index e1bdda5..6c12225 100644 --- a/DEVELOPERS.md +++ b/DEVELOPERS.md @@ -71,7 +71,7 @@ e.g `just metrics prs` to run metrics/tasks/prs.py All tasks are defined in `metrics/tasks` and must have a `main()` function that takes no arguments. -### Fast debug mode +### Speeding up development You can set a flag to trigger a fast mode which only retrieves and handful of PRs but allows the main code paths to be tested quickly. @@ -80,6 +80,23 @@ but allows the main code paths to be tested quickly. DEBUG_FAST=t just metrics prs ``` +Alternatively you can turn on caching of GitHub API requests. +This is particularly useful when iterating on metric definitions +without changing the data that we retrieve from the API. + +``` +DEBUG_CACHE=t just metrics prs +``` + +NB that the cache has no expiry time +(although it will be bypassed on subsequent runs if `DEBUG_CACHE isn't defined). +You can clear the cache explicitly. + +``` +just clean-cache +``` + + ## Tests Run the tests with: ``` diff --git a/justfile b/justfile index 703f986..16275e6 100644 --- a/justfile +++ b/justfile @@ -168,3 +168,7 @@ docker-build env="dev": _env docker-run env="dev" *args="": _env {{ just_executable() }} docker-build {{ env }} docker compose run --rm metrics-{{ env }} {{ args }} + +# See DEVELOPERS.md +clean-cache: + rm -f github-cache.sqlite diff --git a/metrics/github/client.py b/metrics/github/client.py index a0d1855..a702f37 100644 --- a/metrics/github/client.py +++ b/metrics/github/client.py @@ -1,13 +1,22 @@ import json +import os import textwrap import requests import requests.utils +import requests_cache import structlog log = structlog.get_logger() +# See DEVELOPERS.md +if "DEBUG_CACHE" in os.environ: + requests_cache.install_cache( + "github-cache", + # Turn on caching for POST requests because that's what GraphQL uses + allowable_methods=("GET", "HEAD", "POST"), + ) session = requests.Session() diff --git a/requirements.prod.in b/requirements.prod.in index 230eded..68266c9 100644 --- a/requirements.prod.in +++ b/requirements.prod.in @@ -1,5 +1,6 @@ greenlet requests + requests-cache slack-bolt sqlalchemy[postgresql_psycopgbinary] structlog diff --git a/requirements.prod.txt b/requirements.prod.txt index d672359..a1fac02 100644 --- a/requirements.prod.txt +++ b/requirements.prod.txt @@ -4,6 +4,16 @@ # # pip-compile --allow-unsafe --generate-hashes --output-file=requirements.prod.txt requirements.prod.in # +attrs==24.2.0 \ + --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \ + --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2 + # via + # cattrs + # requests-cache +cattrs==24.1.2 \ + --hash=sha256:67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0 \ + --hash=sha256:8028cfe1ff5382df59dd36474a86e02d817b06eaf8af84555441bac915d2ef85 + # via requests-cache certifi==2024.8.30 \ --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 @@ -183,6 +193,10 @@ idna==3.10 \ --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 # via requests +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb + # via requests-cache psycopg[binary]==3.2.2 \ --hash=sha256:8bad2e497ce22d556dac1464738cb948f8d6bab450d965cf1d8a8effd52412e0 \ --hash=sha256:babf565d459d8f72fb65da5e211dd0b58a52c51e4e1fa9cadecff42d6b7619b2 @@ -256,11 +270,21 @@ psycopg-binary==3.2.2 \ requests==2.32.3 \ --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # -r requirements.prod.in + # requests-cache +requests-cache==1.2.1 \ + --hash=sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603 \ + --hash=sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1 # via -r requirements.prod.in sentry-sdk==2.14.0 \ --hash=sha256:1e0e2eaf6dad918c7d1e0edac868a7bf20017b177f242cefe2a6bcd47955961d \ --hash=sha256:b8bc3dc51d06590df1291b7519b85c75e2ced4f28d9ea655b6d54033503b5bf4 # via -r requirements.prod.in +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via url-normalize slack-bolt==1.20.1 \ --hash=sha256:4657e592339797b9b804547a21e6b35dd8e2cd1eab676bfb23960660aae049fd \ --hash=sha256:8fa26e72b0e55c18c1d34a73558e7fe2150bdc7c947de780b938fdb1d7e854fe @@ -330,9 +354,14 @@ typing-extensions==4.12.2 \ # via # psycopg # sqlalchemy +url-normalize==1.4.3 \ + --hash=sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2 \ + --hash=sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed + # via requests-cache urllib3==2.2.3 \ --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 # via # requests + # requests-cache # sentry-sdk