From 80c7d86c092c43603cc1215cf2f26d995d8e4448 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 15:16:40 +0000 Subject: [PATCH 1/9] Add sqlalchemy as a dependency --- pyproject.toml | 2 +- requirements.prod.txt | 59 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 833155f0..06216600 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,8 @@ dependencies = [ "click", "github-to-sqlite", "requests", - "psycopg[binary]", "slack-bolt", + "sqlalchemy[postgresql_psycopgbinary]", "structlog", ] dynamic = ["version"] diff --git a/requirements.prod.txt b/requirements.prod.txt index 38d6396e..fb92db37 100644 --- a/requirements.prod.txt +++ b/requirements.prod.txt @@ -126,7 +126,9 @@ pluggy==1.3.0 \ psycopg[binary]==3.1.12 \ --hash=sha256:8ec5230d6a7eb654b4fb3cf2d3eda8871d68f24807b934790504467f1deee9f8 \ --hash=sha256:cec7ad2bc6a8510e56c45746c631cf9394148bdc8a9a11fd8cf8554ce129ae78 - # via metrics (pyproject.toml) + # via + # psycopg + # sqlalchemy psycopg-binary==3.1.12 \ --hash=sha256:000838cb5ab7851116b462e58893a96b0f1e35864135a6283f3242a730ec45d3 \ --hash=sha256:03a851123d0155e1d6ca5b6cccf624e2fc71c8f7eae76f5100196e0fca047d30 \ @@ -268,6 +270,57 @@ slack-sdk==3.23.0 \ --hash=sha256:2a8513505cced20ceee22b5b49c11d9545caa6234b56bf0ad47133ea5b357d10 \ --hash=sha256:9d6ebc4ff74e7983e1b27dbdb0f2bb6fc3c2a2451694686eaa2be23bbb085a73 # via slack-bolt +sqlalchemy[postgresql-psycopgbinary,postgresql_psycopgbinary]==2.0.23 \ + --hash=sha256:0666031df46b9badba9bed00092a1ffa3aa063a5e68fa244acd9f08070e936d3 \ + --hash=sha256:0a8c6aa506893e25a04233bc721c6b6cf844bafd7250535abb56cb6cc1368884 \ + --hash=sha256:0e680527245895aba86afbd5bef6c316831c02aa988d1aad83c47ffe92655e74 \ + --hash=sha256:14aebfe28b99f24f8a4c1346c48bc3d63705b1f919a24c27471136d2f219f02d \ + --hash=sha256:1e018aba8363adb0599e745af245306cb8c46b9ad0a6fc0a86745b6ff7d940fc \ + --hash=sha256:227135ef1e48165f37590b8bfc44ed7ff4c074bf04dc8d6f8e7f1c14a94aa6ca \ + --hash=sha256:31952bbc527d633b9479f5f81e8b9dfada00b91d6baba021a869095f1a97006d \ + --hash=sha256:3e983fa42164577d073778d06d2cc5d020322425a509a08119bdcee70ad856bf \ + --hash=sha256:42d0b0290a8fb0165ea2c2781ae66e95cca6e27a2fbe1016ff8db3112ac1e846 \ + --hash=sha256:42ede90148b73fe4ab4a089f3126b2cfae8cfefc955c8174d697bb46210c8306 \ + --hash=sha256:4895a63e2c271ffc7a81ea424b94060f7b3b03b4ea0cd58ab5bb676ed02f4221 \ + --hash=sha256:4af79c06825e2836de21439cb2a6ce22b2ca129bad74f359bddd173f39582bf5 \ + --hash=sha256:5f94aeb99f43729960638e7468d4688f6efccb837a858b34574e01143cf11f89 \ + --hash=sha256:616fe7bcff0a05098f64b4478b78ec2dfa03225c23734d83d6c169eb41a93e55 \ + --hash=sha256:62d9e964870ea5ade4bc870ac4004c456efe75fb50404c03c5fd61f8bc669a72 \ + --hash=sha256:638c2c0b6b4661a4fd264f6fb804eccd392745c5887f9317feb64bb7cb03b3ea \ + --hash=sha256:63bfc3acc970776036f6d1d0e65faa7473be9f3135d37a463c5eba5efcdb24c8 \ + --hash=sha256:6463aa765cf02b9247e38b35853923edbf2f6fd1963df88706bc1d02410a5577 \ + --hash=sha256:64ac935a90bc479fee77f9463f298943b0e60005fe5de2aa654d9cdef46c54df \ + --hash=sha256:683ef58ca8eea4747737a1c35c11372ffeb84578d3aab8f3e10b1d13d66f2bc4 \ + --hash=sha256:75eefe09e98043cff2fb8af9796e20747ae870c903dc61d41b0c2e55128f958d \ + --hash=sha256:787af80107fb691934a01889ca8f82a44adedbf5ef3d6ad7d0f0b9ac557e0c34 \ + --hash=sha256:7c424983ab447dab126c39d3ce3be5bee95700783204a72549c3dceffe0fc8f4 \ + --hash=sha256:7e0dc9031baa46ad0dd5a269cb7a92a73284d1309228be1d5935dac8fb3cae24 \ + --hash=sha256:87a3d6b53c39cd173990de2f5f4b83431d534a74f0e2f88bd16eabb5667e65c6 \ + --hash=sha256:89a01238fcb9a8af118eaad3ffcc5dedaacbd429dc6fdc43fe430d3a941ff965 \ + --hash=sha256:9585b646ffb048c0250acc7dad92536591ffe35dba624bb8fd9b471e25212a35 \ + --hash=sha256:964971b52daab357d2c0875825e36584d58f536e920f2968df8d581054eada4b \ + --hash=sha256:967c0b71156f793e6662dd839da54f884631755275ed71f1539c95bbada9aaab \ + --hash=sha256:9ca922f305d67605668e93991aaf2c12239c78207bca3b891cd51a4515c72e22 \ + --hash=sha256:a86cb7063e2c9fb8e774f77fbf8475516d270a3e989da55fa05d08089d77f8c4 \ + --hash=sha256:aeb397de65a0a62f14c257f36a726945a7f7bb60253462e8602d9b97b5cbe204 \ + --hash=sha256:b41f5d65b54cdf4934ecede2f41b9c60c9f785620416e8e6c48349ab18643855 \ + --hash=sha256:bd45a5b6c68357578263d74daab6ff9439517f87da63442d244f9f23df56138d \ + --hash=sha256:c14eba45983d2f48f7546bb32b47937ee2cafae353646295f0e99f35b14286ab \ + --hash=sha256:c1bda93cbbe4aa2aa0aa8655c5aeda505cd219ff3e8da91d1d329e143e4aff69 \ + --hash=sha256:c4722f3bc3c1c2fcc3702dbe0016ba31148dd6efcd2a2fd33c1b4897c6a19693 \ + --hash=sha256:c80c38bd2ea35b97cbf7c21aeb129dcbebbf344ee01a7141016ab7b851464f8e \ + --hash=sha256:cabafc7837b6cec61c0e1e5c6d14ef250b675fa9c3060ed8a7e38653bd732ff8 \ + --hash=sha256:cc1d21576f958c42d9aec68eba5c1a7d715e5fc07825a629015fe8e3b0657fb0 \ + --hash=sha256:d0f7fb0c7527c41fa6fcae2be537ac137f636a41b4c5a4c58914541e2f436b45 \ + --hash=sha256:d4041ad05b35f1f4da481f6b811b4af2f29e83af253bf37c3c4582b2c68934ab \ + --hash=sha256:d5578e6863eeb998980c212a39106ea139bdc0b3f73291b96e27c929c90cd8e1 \ + --hash=sha256:e3b5036aa326dc2df50cba3c958e29b291a80f604b1afa4c8ce73e78e1c9f01d \ + --hash=sha256:e599a51acf3cc4d31d1a0cf248d8f8d863b6386d2b6782c5074427ebb7803bda \ + --hash=sha256:f3420d00d2cb42432c1d0e44540ae83185ccbbc67a6054dcc8ab5387add6620b \ + --hash=sha256:f48ed89dd11c3c586f45e9eec1e437b355b3b6f6884ea4a4c3111a3358fd0c18 \ + --hash=sha256:f508ba8f89e0a5ecdfd3761f82dda2a3d7b678a626967608f4273e0dba8f07ac \ + --hash=sha256:fd54601ef9cc455a0c61e5245f690c8a3ad67ddb03d3b91c361d076def0b4c60 + # via metrics (pyproject.toml) sqlite-fts4==1.0.3 \ --hash=sha256:0359edd8dea6fd73c848989e1e2b1f31a50fe5f9d7272299ff0e8dbaa62d035f \ --hash=sha256:78b05eeaf6680e9dbed8986bde011e9c086a06cb0c931b3cf7da94c214e8930c @@ -287,7 +340,9 @@ tabulate==0.9.0 \ typing-extensions==4.8.0 \ --hash=sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0 \ --hash=sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef - # via psycopg + # via + # psycopg + # sqlalchemy urllib3==2.0.7 \ --hash=sha256:c97dfde1f7bd43a71c8d2a58e369e9b2bf692d1334ea9f9cae55add7d0dd0f84 \ --hash=sha256:fdb6d215c776278489906c2f8916e6e7d4f5a9b602ccbcfdf7f016fc8da0596e From fca54b9bab9116c29403844165adad5265ff4ae2 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 15:16:55 +0000 Subject: [PATCH 2/9] Configure sqlalchemy logs to only show at the warning level We don't want these on by default. --- metrics/logs.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/metrics/logs.py b/metrics/logs.py index 63e9a9f6..5f54d281 100644 --- a/metrics/logs.py +++ b/metrics/logs.py @@ -44,6 +44,11 @@ def setup_logging(debug=False): "level": "DEBUG" if debug else "INFO", "propagate": True, }, + "sqlalchemy": { + "handlers": ["console"], + "level": "WARNING", + "propagate": False, + }, }, } ) From 21d32eda39756b3ed0d33b0d531577e7e7eab1cf Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 15:21:10 +0000 Subject: [PATCH 3/9] Update the timescale URL's protocol for sqlalchemy This is the "correct" protocol for use with postgres. Every other piece of software appears to support the "deprecated" version, but not sqlalchemy. --- dotenv-sample | 2 +- metrics/timescaledb/writer.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dotenv-sample b/dotenv-sample index 098d9e6e..fd9ae6a1 100644 --- a/dotenv-sample +++ b/dotenv-sample @@ -1,5 +1,5 @@ # The DSN for access the timescaledb database -TIMESCALEDB_URL=postgres://user:pass@localhost:5433/metrics +TIMESCALEDB_URL=postgresql://user:pass@localhost:5433/metrics # API token for pulling data from Github GITHUB_TOKEN= diff --git a/metrics/timescaledb/writer.py b/metrics/timescaledb/writer.py index a05c7b39..4d3ba09a 100644 --- a/metrics/timescaledb/writer.py +++ b/metrics/timescaledb/writer.py @@ -9,7 +9,11 @@ log = structlog.get_logger() -TIMESCALEDB_URL = os.environ["TIMESCALEDB_URL"] +# Note: psycopg2 is still the default postgres dialect for sqlalchemy so we +# inject +psycopg to enable using v3 +TIMESCALEDB_URL = os.environ["TIMESCALEDB_URL"].replace( + "postgresql", "postgresql+psycopg" +) def ensure_table(name): From 7321a25c0665264fcfc107d9c03ceeca4199c579 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 15:32:49 +0000 Subject: [PATCH 4/9] Use SQLAlchemy to write data to the database There is a slight semantic change here. TimescaleDBWriter caches all the statements as we process the data, then executes them on exit. This hugely improves backfill performance and doesn't change day-to-day usage performance. --- metrics/github/backfill.py | 17 ++++---- metrics/github/cli.py | 5 ++- metrics/github/prs.py | 3 +- metrics/slack/cli.py | 5 ++- metrics/timescaledb/tables.py | 42 ++++++++++-------- metrics/timescaledb/writer.py | 82 ++++++++++++++++------------------- 6 files changed, 77 insertions(+), 77 deletions(-) diff --git a/metrics/github/backfill.py b/metrics/github/backfill.py index 178de2ae..c7c16757 100644 --- a/metrics/github/backfill.py +++ b/metrics/github/backfill.py @@ -6,10 +6,11 @@ import click import structlog -from metrics.github.prs import process_prs -from metrics.logs import setup_logging -from metrics.timescaledb import TimescaleDBWriter -from metrics.tools.dates import date_from_iso, datetime_from_iso, iter_days +from ..logs import setup_logging +from ..timescaledb import TimescaleDBWriter +from ..timescaledb.tables import GitHubPullRequests +from ..tools.dates import date_from_iso, datetime_from_iso, iter_days +from .prs import process_prs setup_logging() @@ -79,8 +80,8 @@ def pr_queue(prs, org, start, days_threshold=None): key = f"queue{suffix}" log.info("%s | %s | %s | Processing %s PRs", key, day, org, len(prs_on_day)) - with TimescaleDBWriter("github_pull_requests", f"queue{suffix}") as writer: - process_prs(writer, prs_on_day, day) + with TimescaleDBWriter(GitHubPullRequests) as writer: + process_prs(writer, prs_on_day, day, f"queue{suffix}") def pr_throughput(prs, org, start): @@ -116,8 +117,8 @@ def next_weekday(d, weekday): key = "throughput" log.info("%s | %s | %s | Processing %s PRs", key, day, org, len(prs_in_range)) - with TimescaleDBWriter("github_pull_requests", "throughput") as writer: - process_prs(writer, prs_in_range, day) + with TimescaleDBWriter(GitHubPullRequests) as writer: + process_prs(writer, prs_in_range, day, name="throughput") @click.command() diff --git a/metrics/github/cli.py b/metrics/github/cli.py index a18c48d3..6c6122ad 100644 --- a/metrics/github/cli.py +++ b/metrics/github/cli.py @@ -4,6 +4,7 @@ import structlog from ..timescaledb import TimescaleDBWriter +from ..timescaledb.tables import GitHubPullRequests from . import api from .backfill import backfill from .prs import process_prs @@ -59,8 +60,8 @@ def pr_throughput(ctx, org, date, days): prs = api.prs_opened_in_the_last_N_days(org, start, end) log.info("%s | %s | Processing %s PRs", date, org, len(prs)) - with TimescaleDBWriter("github_pull_requests", "throughput") as writer: - process_prs(writer, prs, date) + with TimescaleDBWriter(GitHubPullRequests) as writer: + process_prs(writer, prs, date, name="throughput") github.add_command(backfill) diff --git a/metrics/github/prs.py b/metrics/github/prs.py index 421ddc30..a42e75cd 100644 --- a/metrics/github/prs.py +++ b/metrics/github/prs.py @@ -1,4 +1,4 @@ -def process_prs(writer, prs, date): +def process_prs(writer, prs, date, name=""): """ Given a list of PRs, break them down in series for writing @@ -23,6 +23,7 @@ def process_prs(writer, prs, date): writer.write( date, len(prs_by_author_and_repo), + name=name, author=author, organisation=org, repo=repo, diff --git a/metrics/slack/cli.py b/metrics/slack/cli.py index c0b7481a..442f3c95 100644 --- a/metrics/slack/cli.py +++ b/metrics/slack/cli.py @@ -4,6 +4,7 @@ import click from ..timescaledb import TimescaleDBWriter +from ..timescaledb.tables import SlackTechSupport from .api import get_app, iter_messages @@ -35,8 +36,8 @@ def tech_support(ctx, date, tech_support_channel_id, backfill): messages = iter_messages(app, tech_support_channel_id, date=day) - with TimescaleDBWriter("slack_tech_support", "requests") as writer: + with TimescaleDBWriter(SlackTechSupport) as writer: for date, messages in itertools.groupby( messages, lambda m: datetime.fromtimestamp(float(m["ts"])).date() ): - writer.write(date, len(list(messages))) + writer.write(date, len(list(messages)), name="requests") diff --git a/metrics/timescaledb/tables.py b/metrics/timescaledb/tables.py index 963ee643..6942242f 100644 --- a/metrics/timescaledb/tables.py +++ b/metrics/timescaledb/tables.py @@ -1,19 +1,23 @@ -github_pull_requests = """ -CREATE TABLE IF NOT EXISTS github_pull_requests ( - time TIMESTAMP WITH TIME ZONE NOT NULL, - name TEXT NOT NULL, - value INTEGER NOT NULL, - author TEXT NOT NULL, - organisation TEXT NOT NULL, - repo TEXT NOT NULL, - CONSTRAINT github_pull_requests_must_be_different UNIQUE (time, name, author, repo) -); -""" -slack_tech_support = """ -CREATE TABLE IF NOT EXISTS slack_tech_support ( - time TIMESTAMP WITH TIME ZONE NOT NULL, - name TEXT NOT NULL, - value INTEGER NOT NULL, - CONSTRAINT slack_tech_support_must_be_different UNIQUE (time, name) -); -""" +from sqlalchemy import TIMESTAMP, Column, Integer, MetaData, Table, Text + + +metadata = MetaData() + +GitHubPullRequests = Table( + "github_pull_requests", + metadata, + Column("time", TIMESTAMP(timezone=True), primary_key=True), + Column("name", Text, primary_key=True), + Column("value", Integer), + Column("author", Text, primary_key=True), + Column("organisation", Text), + Column("repo", Text, primary_key=True), +) + +SlackTechSupport = Table( + "slack_tech_support", + metadata, + Column("time", TIMESTAMP(timezone=True), primary_key=True), + Column("name", Text, primary_key=True), + Column("value", Integer), +) diff --git a/metrics/timescaledb/writer.py b/metrics/timescaledb/writer.py index 4d3ba09a..6fe47b68 100644 --- a/metrics/timescaledb/writer.py +++ b/metrics/timescaledb/writer.py @@ -1,10 +1,9 @@ import os from datetime import datetime, time -import psycopg import structlog - -from . import tables +from sqlalchemy import create_engine, inspect, schema, text +from sqlalchemy.dialects.postgresql import insert log = structlog.get_logger() @@ -14,33 +13,31 @@ TIMESCALEDB_URL = os.environ["TIMESCALEDB_URL"].replace( "postgresql", "postgresql+psycopg" ) +engine = create_engine(TIMESCALEDB_URL) -def ensure_table(name): +def ensure_table(table): """ Ensure both the table and hypertable config exist in the database """ - run(getattr(tables, name)) - - run( - "SELECT create_hypertable(%s, 'time', if_not_exists => TRUE);", - [name], - ) - - # ensure the RO grafana user can read the table - run(f"GRANT SELECT ON {name} TO grafanareader") - - -def run(sql, *args): - with psycopg.connect(TIMESCALEDB_URL) as conn: - cursor = conn.cursor() + with engine.begin() as connection: + connection.execute(schema.CreateTable(table, if_not_exists=True)) + + with engine.begin() as connection: + connection.execute( + text( + f"SELECT create_hypertable('{table.name}', 'time', if_not_exists => TRUE);" + ) + ) - return cursor.execute(sql, *args) + # ensure the RO grafana user can read the table + connection.execute(text(f"GRANT SELECT ON {table.name} TO grafanareader")) class TimescaleDBWriter: - def __init__(self, table, key): - self.key = key + inserts = [] + + def __init__(self, table): self.table = table def __enter__(self): @@ -49,7 +46,9 @@ def __enter__(self): return self def __exit__(self, *args): - pass + with engine.begin() as connection: + for stmt in self.inserts: + connection.execute(stmt) def write(self, date, value, **kwargs): # convert date to a timestamp @@ -57,27 +56,20 @@ def write(self, date, value, **kwargs): # UTC? dt = datetime.combine(date, time()) - # insert into the table set at instantiation - # unique by the tables `{name}_must_be_different` and we always want to - # bump the value if that triggers a conflict - # the columns could differ per table… do we want an object to represent tables? - if kwargs: - extra_fields = ", " + ", ".join(kwargs.keys()) - placeholders = ", " + ", ".join(["%s" for k in kwargs.keys()]) - else: - extra_fields = "" - placeholders = "" - sql = f""" - INSERT INTO {self.table} (time, name, value {extra_fields}) - VALUES (%s, %s, %s {placeholders}) - ON CONFLICT ON CONSTRAINT {self.table}_must_be_different DO UPDATE SET value = EXCLUDED.value; - """ - - run(sql, (dt, self.key, value, *kwargs.values())) - - log.debug( - self.key, - date=dt.isoformat(), - value=value, - **kwargs, + # get the primary key name from the given table + constraint = inspect(engine).get_pk_constraint(self.table.name)["name"] + + # TODO: could we put do all the rows at once in the values() call and + # then use EXCLUDED to reference the value in the set_? + insert_stmt = ( + insert(self.table) + .values(time=dt, value=value, **kwargs) + .on_conflict_do_update( + constraint=constraint, + set_={"value": value}, + ) ) + + self.inserts.append(insert_stmt) + + log.debug(insert_stmt) From ba5dba21553cb5b01ac2763ecf162089ae0d90e1 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 15:34:33 +0000 Subject: [PATCH 5/9] Set the db_path default in the option Without a default click passes an explicit None to this arg. --- metrics/github/backfill.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/github/backfill.py b/metrics/github/backfill.py index c7c16757..dbd21a95 100644 --- a/metrics/github/backfill.py +++ b/metrics/github/backfill.py @@ -124,9 +124,9 @@ def next_weekday(d, weekday): @click.command() @click.argument("org") @click.option("--pull-data", is_flag=True, default=False) -@click.option("--db-path", type=str) +@click.option("--db-path", type=str, default="github.db") @click.pass_context -def backfill(ctx, org, pull_data, db_path="github.db"): +def backfill(ctx, org, pull_data, db_path): """Backfill GitHub data for the given GitHub ORG""" if pull_data: # clean up existing db From b94b5244a481e696f6c5d02af5aad4ba7ef6c03a Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 15:35:14 +0000 Subject: [PATCH 6/9] Wrap datetime_from_iso in date_from_iso Sometimes the format of the date string being passed to date_from_iso is actually a datetime. While ideally this wouldn't be the case, it's much easier for the function to do what the programmer wanted than for the programmer to go and fix the input data. --- metrics/tools/dates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/tools/dates.py b/metrics/tools/dates.py index 8c2b535f..abf40f48 100644 --- a/metrics/tools/dates.py +++ b/metrics/tools/dates.py @@ -8,7 +8,7 @@ def date_from_iso(value): if value is None: return date.today() - return date.fromisoformat(value) + return datetime_from_iso(value).date() def datetime_from_iso(value): From 53409abaf04c2bc26716da8ffe4a7dfac63a87b1 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 16:05:03 +0000 Subject: [PATCH 7/9] Always pass dates to the GitHub search API Otherwise we get a malformed search query. --- metrics/github/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/github/cli.py b/metrics/github/cli.py index 6c6122ad..7e969997 100644 --- a/metrics/github/cli.py +++ b/metrics/github/cli.py @@ -55,7 +55,7 @@ def pr_queue(ctx, org, date, days_threshold): def pr_throughput(ctx, org, date, days): """PRs opened in the last number of days given""" end = date.date() - start = date - timedelta(days=days) + start = end - timedelta(days=days) prs = api.prs_opened_in_the_last_N_days(org, start, end) From 2a348ac3f6b7a32ff49da6e5ea6cae207f1c89cf Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 16:28:38 +0000 Subject: [PATCH 8/9] explicitly install greenlet to fix sqlalchemy install in CI sqlalchemy==2.0.23 has a conditional install of greenlet [1] which affects a very complex set of architectures. This has only affected us in CI so far, but breaks pip installs there because we don't have greenlet installed and we're using pip in hashes mode. The install_requires has been updated for the upcoming 2.1 release to avoid this situation, so we can remove this change once that's released. 1: https://github.com/sqlalchemy/sqlalchemy/blob/rel_2_0_23/setup.cfg#L40 --- pyproject.toml | 1 + requirements.prod.txt | 59 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 06216600..fc515ba5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ requires-python = ">=3.11" dependencies = [ "click", "github-to-sqlite", + "greenlet", "requests", "slack-bolt", "sqlalchemy[postgresql_psycopgbinary]", diff --git a/requirements.prod.txt b/requirements.prod.txt index fb92db37..da867bcc 100644 --- a/requirements.prod.txt +++ b/requirements.prod.txt @@ -115,6 +115,65 @@ github-to-sqlite==2.8.3 \ --hash=sha256:16af0e18e4c1002e973b585333207b061137f509a9c42fc2ad48eae797ac3f0b \ --hash=sha256:f5f28b9144bb758f99a923dadd7c3904c84c5786f34cc70c47ba64ee6f7dbe41 # via metrics (pyproject.toml) +greenlet==3.0.1 \ + --hash=sha256:0a02d259510b3630f330c86557331a3b0e0c79dac3d166e449a39363beaae174 \ + --hash=sha256:0b6f9f8ca7093fd4433472fd99b5650f8a26dcd8ba410e14094c1e44cd3ceddd \ + --hash=sha256:100f78a29707ca1525ea47388cec8a049405147719f47ebf3895e7509c6446aa \ + --hash=sha256:1757936efea16e3f03db20efd0cd50a1c86b06734f9f7338a90c4ba85ec2ad5a \ + --hash=sha256:19075157a10055759066854a973b3d1325d964d498a805bb68a1f9af4aaef8ec \ + --hash=sha256:19bbdf1cce0346ef7341705d71e2ecf6f41a35c311137f29b8a2dc2341374565 \ + --hash=sha256:20107edf7c2c3644c67c12205dc60b1bb11d26b2610b276f97d666110d1b511d \ + --hash=sha256:22f79120a24aeeae2b4471c711dcf4f8c736a2bb2fabad2a67ac9a55ea72523c \ + --hash=sha256:2847e5d7beedb8d614186962c3d774d40d3374d580d2cbdab7f184580a39d234 \ + --hash=sha256:28e89e232c7593d33cac35425b58950789962011cc274aa43ef8865f2e11f46d \ + --hash=sha256:329c5a2e5a0ee942f2992c5e3ff40be03e75f745f48847f118a3cfece7a28546 \ + --hash=sha256:337322096d92808f76ad26061a8f5fccb22b0809bea39212cd6c406f6a7060d2 \ + --hash=sha256:3fcc780ae8edbb1d050d920ab44790201f027d59fdbd21362340a85c79066a74 \ + --hash=sha256:41bdeeb552d814bcd7fb52172b304898a35818107cc8778b5101423c9017b3de \ + --hash=sha256:4eddd98afc726f8aee1948858aed9e6feeb1758889dfd869072d4465973f6bfd \ + --hash=sha256:52e93b28db27ae7d208748f45d2db8a7b6a380e0d703f099c949d0f0d80b70e9 \ + --hash=sha256:55d62807f1c5a1682075c62436702aaba941daa316e9161e4b6ccebbbf38bda3 \ + --hash=sha256:5805e71e5b570d490938d55552f5a9e10f477c19400c38bf1d5190d760691846 \ + --hash=sha256:599daf06ea59bfedbec564b1692b0166a0045f32b6f0933b0dd4df59a854caf2 \ + --hash=sha256:60d5772e8195f4e9ebf74046a9121bbb90090f6550f81d8956a05387ba139353 \ + --hash=sha256:696d8e7d82398e810f2b3622b24e87906763b6ebfd90e361e88eb85b0e554dc8 \ + --hash=sha256:6e6061bf1e9565c29002e3c601cf68569c450be7fc3f7336671af7ddb4657166 \ + --hash=sha256:80ac992f25d10aaebe1ee15df45ca0d7571d0f70b645c08ec68733fb7a020206 \ + --hash=sha256:816bd9488a94cba78d93e1abb58000e8266fa9cc2aa9ccdd6eb0696acb24005b \ + --hash=sha256:85d2b77e7c9382f004b41d9c72c85537fac834fb141b0296942d52bf03fe4a3d \ + --hash=sha256:87c8ceb0cf8a5a51b8008b643844b7f4a8264a2c13fcbcd8a8316161725383fe \ + --hash=sha256:89ee2e967bd7ff85d84a2de09df10e021c9b38c7d91dead95b406ed6350c6997 \ + --hash=sha256:8bef097455dea90ffe855286926ae02d8faa335ed8e4067326257cb571fc1445 \ + --hash=sha256:8d11ebbd679e927593978aa44c10fc2092bc454b7d13fdc958d3e9d508aba7d0 \ + --hash=sha256:91e6c7db42638dc45cf2e13c73be16bf83179f7859b07cfc139518941320be96 \ + --hash=sha256:97e7ac860d64e2dcba5c5944cfc8fa9ea185cd84061c623536154d5a89237884 \ + --hash=sha256:990066bff27c4fcf3b69382b86f4c99b3652bab2a7e685d968cd4d0cfc6f67c6 \ + --hash=sha256:9fbc5b8f3dfe24784cee8ce0be3da2d8a79e46a276593db6868382d9c50d97b1 \ + --hash=sha256:ac4a39d1abae48184d420aa8e5e63efd1b75c8444dd95daa3e03f6c6310e9619 \ + --hash=sha256:b2c02d2ad98116e914d4f3155ffc905fd0c025d901ead3f6ed07385e19122c94 \ + --hash=sha256:b2d3337dcfaa99698aa2377c81c9ca72fcd89c07e7eb62ece3f23a3fe89b2ce4 \ + --hash=sha256:b489c36d1327868d207002391f662a1d163bdc8daf10ab2e5f6e41b9b96de3b1 \ + --hash=sha256:b641161c302efbb860ae6b081f406839a8b7d5573f20a455539823802c655f63 \ + --hash=sha256:b8ba29306c5de7717b5761b9ea74f9c72b9e2b834e24aa984da99cbfc70157fd \ + --hash=sha256:b9934adbd0f6e476f0ecff3c94626529f344f57b38c9a541f87098710b18af0a \ + --hash=sha256:ce85c43ae54845272f6f9cd8320d034d7a946e9773c693b27d620edec825e376 \ + --hash=sha256:cf868e08690cb89360eebc73ba4be7fb461cfbc6168dd88e2fbbe6f31812cd57 \ + --hash=sha256:d2905ce1df400360463c772b55d8e2518d0e488a87cdea13dd2c71dcb2a1fa16 \ + --hash=sha256:d57e20ba591727da0c230ab2c3f200ac9d6d333860d85348816e1dca4cc4792e \ + --hash=sha256:d6a8c9d4f8692917a3dc7eb25a6fb337bff86909febe2f793ec1928cd97bedfc \ + --hash=sha256:d923ff276f1c1f9680d32832f8d6c040fe9306cbfb5d161b0911e9634be9ef0a \ + --hash=sha256:daa7197b43c707462f06d2c693ffdbb5991cbb8b80b5b984007de431493a319c \ + --hash=sha256:dbd4c177afb8a8d9ba348d925b0b67246147af806f0b104af4d24f144d461cd5 \ + --hash=sha256:dc4d815b794fd8868c4d67602692c21bf5293a75e4b607bb92a11e821e2b859a \ + --hash=sha256:e9d21aaa84557d64209af04ff48e0ad5e28c5cca67ce43444e939579d085da72 \ + --hash=sha256:ea6b8aa9e08eea388c5f7a276fabb1d4b6b9d6e4ceb12cc477c3d352001768a9 \ + --hash=sha256:eabe7090db68c981fca689299c2d116400b553f4b713266b130cfc9e2aa9c5a9 \ + --hash=sha256:f2f6d303f3dee132b322a14cd8765287b8f86cdc10d2cb6a6fae234ea488888e \ + --hash=sha256:f33f3258aae89da191c6ebaa3bc517c6c4cbc9b9f689e5d8452f7aedbb913fa8 \ + --hash=sha256:f7bfb769f7efa0eefcd039dd19d843a4fbfbac52f1878b1da2ed5793ec9b1a65 \ + --hash=sha256:f89e21afe925fcfa655965ca8ea10f24773a1791400989ff32f467badfe4a064 \ + --hash=sha256:fa24255ae3c0ab67e613556375a4341af04a084bd58764731972bcbc8baeba36 + # via metrics (pyproject.toml) idna==3.4 \ --hash=sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4 \ --hash=sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 From 8e47869da42872ddbf427820189d0d9a04b54ae6 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 13 Nov 2023 16:35:41 +0000 Subject: [PATCH 9/9] Move sqlalchemy engine instance into TimescaleDBWriter instance So we're not instantiating it on import --- metrics/timescaledb/writer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/metrics/timescaledb/writer.py b/metrics/timescaledb/writer.py index 6fe47b68..83a32813 100644 --- a/metrics/timescaledb/writer.py +++ b/metrics/timescaledb/writer.py @@ -13,10 +13,9 @@ TIMESCALEDB_URL = os.environ["TIMESCALEDB_URL"].replace( "postgresql", "postgresql+psycopg" ) -engine = create_engine(TIMESCALEDB_URL) -def ensure_table(table): +def ensure_table(engine, table): """ Ensure both the table and hypertable config exist in the database """ @@ -39,14 +38,15 @@ class TimescaleDBWriter: def __init__(self, table): self.table = table + self.engine = create_engine(TIMESCALEDB_URL) def __enter__(self): - ensure_table(self.table) + ensure_table(self.engine, self.table) return self def __exit__(self, *args): - with engine.begin() as connection: + with self.engine.begin() as connection: for stmt in self.inserts: connection.execute(stmt) @@ -57,7 +57,7 @@ def write(self, date, value, **kwargs): dt = datetime.combine(date, time()) # get the primary key name from the given table - constraint = inspect(engine).get_pk_constraint(self.table.name)["name"] + constraint = inspect(self.engine).get_pk_constraint(self.table.name)["name"] # TODO: could we put do all the rows at once in the values() call and # then use EXCLUDED to reference the value in the set_?