From af55cf1ed2ebe7c5659d72db637f90949b918bf1 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Fri, 1 Dec 2023 11:56:13 +0000 Subject: [PATCH 1/2] Use batched from the local itertools helper module This got left around after a bad rebase. --- metrics/timescaledb/db.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/metrics/timescaledb/db.py b/metrics/timescaledb/db.py index 61cbbcdf..0797990e 100644 --- a/metrics/timescaledb/db.py +++ b/metrics/timescaledb/db.py @@ -1,23 +1,10 @@ -import itertools - import structlog from sqlalchemy import text +from ..tools.iter import batched -log = structlog.get_logger() - - -def batched(iterable, n): - """ - Backport of 3.12's itertools.batched - - https://docs.python.org/3/library/itertools.html#itertools.batched - batched('ABCDEFG', 3) --> ABC DEF G - """ - it = iter(iterable) - while batch := tuple(itertools.islice(it, n)): - yield batch +log = structlog.get_logger() def delete_rows(connection, name, n=10000): From 161e0489924f08388d1e53a6820251699388110d Mon Sep 17 00:00:00 2001 From: George Hickman Date: Fri, 1 Dec 2023 12:00:30 +0000 Subject: [PATCH 2/2] Always backfill slack data --- metrics/slack/api.py | 7 +------ metrics/slack/cli.py | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/metrics/slack/api.py b/metrics/slack/api.py index b5bd6227..2fc181dd 100644 --- a/metrics/slack/api.py +++ b/metrics/slack/api.py @@ -1,5 +1,3 @@ -from datetime import datetime, time, timedelta - from slack_bolt import App @@ -10,11 +8,8 @@ def get_app(signing_secret, token): return App(token=token, signing_secret=signing_secret) -def iter_messages(app, channel_id, date=None): +def iter_messages(app, channel_id): start = end = 0 - if date: - start = datetime.combine(date, time()).timestamp() - end = (datetime.combine(date, time()) + timedelta(days=1)).timestamp() for page in app.client.conversations_history( channel=channel_id, diff --git a/metrics/slack/cli.py b/metrics/slack/cli.py index 442f3c95..4da7a440 100644 --- a/metrics/slack/cli.py +++ b/metrics/slack/cli.py @@ -2,12 +2,18 @@ from datetime import datetime import click +import structlog +from sqlalchemy import create_engine -from ..timescaledb import TimescaleDBWriter +from ..timescaledb import TimescaleDBWriter, drop_tables from ..timescaledb.tables import SlackTechSupport +from ..timescaledb.writer import TIMESCALEDB_URL from .api import get_app, iter_messages +log = structlog.get_logger() + + @click.group() @click.option("--signing-secret", required=True, envvar="SLACK_SIGNING_SECRET") @click.option("--token", required=True, envvar="SLACK_TOKEN") @@ -20,21 +26,22 @@ def slack(ctx, signing_secret, token): @slack.command() -@click.argument("date", type=click.DateTime(), required=False) @click.option( "--tech-support-channel-id", required=True, envvar="SLACK_TECH_SUPPORT_CHANNEL_ID" ) -@click.option("--backfill", is_flag=True) @click.pass_context -def tech_support(ctx, date, tech_support_channel_id, backfill): - if backfill and date: - raise click.BadParameter("--backfill cannot be used with a date") - - day = None if backfill else date.date() - +def tech_support(ctx, tech_support_channel_id): app = get_app(ctx.obj["SLACK_SIGNING_SECRET"], ctx.obj["SLACK_TOKEN"]) - messages = iter_messages(app, tech_support_channel_id, date=day) + messages = iter_messages(app, tech_support_channel_id) + + log.info("Dropping existing slack_* tables") + # TODO: we have this in three places now, can we pull into some kind of + # service wrapper? + engine = create_engine(TIMESCALEDB_URL) + with engine.begin() as connection: + drop_tables(connection, prefix="slack_") + log.info("Dropped existing slack_* tables") with TimescaleDBWriter(SlackTechSupport) as writer: for date, messages in itertools.groupby(