From 1367e262fb266f3e132232e877be614783b75151 Mon Sep 17 00:00:00 2001 From: Jon Massey Date: Mon, 4 Nov 2024 11:21:58 +0000 Subject: [PATCH] Implement deleted flag for codespaces If we are to ascertain which *currently active* codespaces are at risk of deletion (due to 30d lifespan), we need to know the status of current and past codespaces. When deleted, a codespace's record is no long returned by the GitHub API so we can assume that all codespaces that are not listed in the API response are no longer in existence. --- metrics/github/github.py | 2 ++ metrics/github/metrics.py | 1 + metrics/tasks/codespaces.py | 6 ++++++ metrics/timescaledb/db.py | 13 ++++++++++++- metrics/timescaledb/tables.py | 1 + tests/metrics/timescaledb/test_db.py | 22 ++++++++++++++++++++++ 6 files changed, 44 insertions(+), 1 deletion(-) diff --git a/metrics/github/github.py b/metrics/github/github.py index e7aacde..b7bebae 100644 --- a/metrics/github/github.py +++ b/metrics/github/github.py @@ -117,6 +117,7 @@ class Codespace: last_used_at: datetime.datetime has_uncommitted_changes: bool has_unpushed_changes: bool + deleted: bool @classmethod def from_dict(cls, data, org): @@ -128,6 +129,7 @@ def from_dict(cls, data, org): last_used_at=data["last_used_at"], has_uncommitted_changes=data["git_status"]["has_uncommitted_changes"], has_unpushed_changes=data["git_status"]["has_unpushed_changes"], + deleted=False, ) diff --git a/metrics/github/metrics.py b/metrics/github/metrics.py index f73c3ee..02650b5 100644 --- a/metrics/github/metrics.py +++ b/metrics/github/metrics.py @@ -87,6 +87,7 @@ def convert_codespaces_to_dicts(codespaces): "last_used_at": c.last_used_at, "has_uncommitted_changes": c.has_uncommitted_changes, "has_unpushed_changes": c.has_unpushed_changes, + "deleted": c.deleted, } for c in codespaces ] diff --git a/metrics/tasks/codespaces.py b/metrics/tasks/codespaces.py index 38159fb..7e53daf 100644 --- a/metrics/tasks/codespaces.py +++ b/metrics/tasks/codespaces.py @@ -15,6 +15,12 @@ def main(): codespaces = github.codespaces(org="opensafely") log.info(f"Got {len(codespaces)} codespaces") + log.info("Flagging old codespaces as deleted") + db.flag_deleted(tables.GitHubCodespaces) + log.info("Deletes flagged") + + # Incoming data has deleted=False so previously flagged rows will be overwritten + # if codespace still exists. log.info("Writing data") db.upsert(tables.GitHubCodespaces, convert_codespaces_to_dicts(codespaces)) log.info("Written data") diff --git a/metrics/timescaledb/db.py b/metrics/timescaledb/db.py index 213019f..119d47a 100644 --- a/metrics/timescaledb/db.py +++ b/metrics/timescaledb/db.py @@ -2,7 +2,7 @@ import os import structlog -from sqlalchemy import MetaData, create_engine, inspect, schema, text +from sqlalchemy import Boolean, MetaData, create_engine, inspect, schema, text from sqlalchemy.dialects.postgresql import insert from sqlalchemy.engine import make_url @@ -65,6 +65,17 @@ def upsert(table, rows): log.info("Inserted %s rows", len(values), table=table.name) +def flag_deleted(table): + if "deleted" not in table.columns or not isinstance( + table.columns["deleted"].type, Boolean + ): + raise AttributeError("Table must have deleted column of boolean type") + with _get_engine().begin() as connection: + _ensure_table(connection, table) + update = table.update().values(deleted=True) + connection.execute(update) + + def _batch_size(table): max_params = 65535 # limit for postgresql return max_params // len(table.columns) diff --git a/metrics/timescaledb/tables.py b/metrics/timescaledb/tables.py index 94497dd..65289d8 100644 --- a/metrics/timescaledb/tables.py +++ b/metrics/timescaledb/tables.py @@ -14,6 +14,7 @@ Column("last_used_at", TIMESTAMP(timezone=True)), Column("has_uncommitted_changes", Boolean), Column("has_unpushed_changes", Boolean), + Column("deleted", Boolean), ) GitHubRepos = Table( diff --git a/tests/metrics/timescaledb/test_db.py b/tests/metrics/timescaledb/test_db.py index 2b13522..02a8eb5 100644 --- a/tests/metrics/timescaledb/test_db.py +++ b/tests/metrics/timescaledb/test_db.py @@ -3,6 +3,7 @@ import pytest from sqlalchemy import ( TIMESTAMP, + Boolean, Column, Integer, MetaData, @@ -216,6 +217,27 @@ def test_write(engine, table): assert len(rows) == 3 +def test_flag_deleted(engine, table): + table.append_column(Column("deleted", Boolean)) + # insert initial rows + with engine.begin() as connection: + db._ensure_table(connection, table) + + with engine.begin() as connection: + rows = [ + { + "value": "write" + str(i), + "deleted": False, + } + for i in range(1, 4) + ] + db.write(table, rows) + db.flag_deleted(table) + + rows = get_rows(engine, table) + assert all(r[1] for r in rows) + + def test_upsert(engine, table): # add a non-PK column to the table table.append_column(Column("value2", Text))