Skip to content

Commit

Permalink
Implement deleted flag for codespaces
Browse files Browse the repository at this point in the history
If we are to ascertain which *currently active* codespaces are at risk of
deletion (due to 30d lifespan), we need to know the status of current and
past codespaces.

When deleted, a codespace's record is no long returned by the GitHub API
so we can assume that all codespaces that are not listed in the API response
are no longer in existence.
  • Loading branch information
Jongmassey committed Nov 4, 2024
1 parent 3b70937 commit 1367e26
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 1 deletion.
2 changes: 2 additions & 0 deletions metrics/github/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ class Codespace:
last_used_at: datetime.datetime
has_uncommitted_changes: bool
has_unpushed_changes: bool
deleted: bool

@classmethod
def from_dict(cls, data, org):
Expand All @@ -128,6 +129,7 @@ def from_dict(cls, data, org):
last_used_at=data["last_used_at"],
has_uncommitted_changes=data["git_status"]["has_uncommitted_changes"],
has_unpushed_changes=data["git_status"]["has_unpushed_changes"],
deleted=False,
)


Expand Down
1 change: 1 addition & 0 deletions metrics/github/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def convert_codespaces_to_dicts(codespaces):
"last_used_at": c.last_used_at,
"has_uncommitted_changes": c.has_uncommitted_changes,
"has_unpushed_changes": c.has_unpushed_changes,
"deleted": c.deleted,
}
for c in codespaces
]
6 changes: 6 additions & 0 deletions metrics/tasks/codespaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ def main():
codespaces = github.codespaces(org="opensafely")
log.info(f"Got {len(codespaces)} codespaces")

log.info("Flagging old codespaces as deleted")
db.flag_deleted(tables.GitHubCodespaces)
log.info("Deletes flagged")

# Incoming data has deleted=False so previously flagged rows will be overwritten
# if codespace still exists.
log.info("Writing data")
db.upsert(tables.GitHubCodespaces, convert_codespaces_to_dicts(codespaces))
log.info("Written data")
Expand Down
13 changes: 12 additions & 1 deletion metrics/timescaledb/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

import structlog
from sqlalchemy import MetaData, create_engine, inspect, schema, text
from sqlalchemy import Boolean, MetaData, create_engine, inspect, schema, text
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.engine import make_url

Expand Down Expand Up @@ -65,6 +65,17 @@ def upsert(table, rows):
log.info("Inserted %s rows", len(values), table=table.name)


def flag_deleted(table):
if "deleted" not in table.columns or not isinstance(
table.columns["deleted"].type, Boolean
):
raise AttributeError("Table must have deleted column of boolean type")
with _get_engine().begin() as connection:
_ensure_table(connection, table)
update = table.update().values(deleted=True)
connection.execute(update)


def _batch_size(table):
max_params = 65535 # limit for postgresql
return max_params // len(table.columns)
Expand Down
1 change: 1 addition & 0 deletions metrics/timescaledb/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Column("last_used_at", TIMESTAMP(timezone=True)),
Column("has_uncommitted_changes", Boolean),
Column("has_unpushed_changes", Boolean),
Column("deleted", Boolean),
)

GitHubRepos = Table(
Expand Down
22 changes: 22 additions & 0 deletions tests/metrics/timescaledb/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
from sqlalchemy import (
TIMESTAMP,
Boolean,
Column,
Integer,
MetaData,
Expand Down Expand Up @@ -216,6 +217,27 @@ def test_write(engine, table):
assert len(rows) == 3


def test_flag_deleted(engine, table):
table.append_column(Column("deleted", Boolean))
# insert initial rows
with engine.begin() as connection:
db._ensure_table(connection, table)

with engine.begin() as connection:
rows = [
{
"value": "write" + str(i),
"deleted": False,
}
for i in range(1, 4)
]
db.write(table, rows)
db.flag_deleted(table)

rows = get_rows(engine, table)
assert all(r[1] for r in rows)


def test_upsert(engine, table):
# add a non-PK column to the table
table.append_column(Column("value2", Text))
Expand Down

0 comments on commit 1367e26

Please sign in to comment.