Skip to content

Commit

Permalink
Use correct signatures for Celery Task Hooks
Browse files Browse the repository at this point in the history
This explicitly declares and forwards all the arguments to the `on_success/retry/failure` task hooks, as they are documented on https://docs.celeryq.dev/en/main/_modules/celery/app/task.html#Task.on_success

The reason being that the Sentry tags set via the `MetricContext` constructor are not making their way to Sentry, possibly because the `kwargs` were misused in the previous hooks, and were overwriting those tag values with `None`.

So this should ideally solve that mystery.

As a driveby change, I also took the liberty of removing all the deprecated statsd `metrics` calls. All of the relevant metrics also have prometheus equivalents.
  • Loading branch information
Swatinem committed Oct 16, 2024
1 parent 393d765 commit 7844c7a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 63 deletions.
21 changes: 6 additions & 15 deletions helpers/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,39 +68,30 @@ def __init__(
sentry_sdk.set_tag("owner_id", owner_id)
sentry_sdk.set_tag("repo_id", repo_id)
sentry_sdk.set_tag("commit_sha", commit_sha)
transaction = sentry_sdk.get_current_scope().transaction
if transaction is not None:
transaction.set_tag("owner_id", owner_id)
transaction.set_tag("repo_id", repo_id)
transaction.set_tag("commit_sha", commit_sha)

def populate(self):
if self.populated:
return

repo = None
commit = None
dbsession = get_db_session()

if self.repo_id:
if not self.owner_id:
repo = (
dbsession.query(Repository)
self.owner_id = (

Check warning on line 80 in helpers/telemetry.py

View check run for this annotation

Codecov Notifications / codecov/patch

helpers/telemetry.py#L80

Added line #L80 was not covered by tests
dbsession.query(Repository.ownerid)
.filter(Repository.repoid == self.repo_id)
.first()
.first()[0]
)
self.owner_id = repo.ownerid

if self.commit_sha and not self.commit_id:
commit = (
dbsession.query(Commit)
self.commit_id = (
dbsession.query(Commit.id_)
.filter(
Commit.repoid == self.repo_id,
Commit.commitid == self.commit_sha,
)
.first()
.first()[0]
)
self.commit_id = commit.id_

self.populated = True

Expand Down
79 changes: 31 additions & 48 deletions tasks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
log_set_task_id,
log_set_task_name,
)
from helpers.metrics import metrics
from helpers.telemetry import MetricContext, TimeseriesTimer
from helpers.timeseries import timeseries_enabled

Expand All @@ -51,9 +50,7 @@ def on_timeout(self, soft: bool, timeout: int):
res = super().on_timeout(soft, timeout)
if not soft:
REQUEST_HARD_TIMEOUT_COUNTER.labels(task=self.name).inc()
metrics.incr(f"{self.metrics_prefix}.hardtimeout")
REQUEST_TIMEOUT_COUNTER.labels(task=self.name).inc()
metrics.incr(f"{self.metrics_prefix}.timeout")
return res


Expand Down Expand Up @@ -245,20 +242,13 @@ def _emit_queue_metrics(self):
enqueued_time = datetime.fromisoformat(created_timestamp)
now = datetime.now()
delta = now - enqueued_time
metrics.timing(f"{self.metrics_prefix}.time_in_queue", delta)

queue_name = self.request.get("delivery_info", {}).get("routing_key", None)
time_in_queue_timer = TASK_TIME_IN_QUEUE.labels(
task=self.name, queue=queue_name
) # TODO is None a valid label value
time_in_queue_timer.observe(delta.total_seconds())

if queue_name:
metrics.timing(f"worker.queues.{queue_name}.time_in_queue", delta)
metrics.timing(
f"{self.metrics_prefix}.{queue_name}.time_in_queue", delta
)

def run(self, *args, **kwargs):
task = get_current_task()

Expand All @@ -283,35 +273,31 @@ def run(self, *args, **kwargs):
metric_context, f"{self.metrics_prefix}.full_runtime", sync=True
):
with self.task_full_runtime.time(): # Timer isn't tested
with metrics.timer(f"{self.metrics_prefix}.full"):
db_session = get_db_session()
try:
with TimeseriesTimer(
metric_context,
f"{self.metrics_prefix}.core_runtime",
sync=True,
):
with self.task_core_runtime.time(): # Timer isn't tested
with metrics.timer(f"{self.metrics_prefix}.run"):
return self.run_impl(db_session, *args, **kwargs)
except (DataError, IntegrityError):
log.exception(
"Errors related to the constraints of database happened",
extra=dict(task_args=args, task_kwargs=kwargs),
)
db_session.rollback()
self._rollback_django()
self.retry()
except SQLAlchemyError as ex:
self._analyse_error(ex, args, kwargs)
db_session.rollback()
self._rollback_django()
self.retry()
finally:
log_set_task_name(None)
log_set_task_id(None)
self.wrap_up_dbsession(db_session)
self._commit_django()
db_session = get_db_session()
try:
with TimeseriesTimer(
metric_context, f"{self.metrics_prefix}.core_runtime", sync=True
):
with self.task_core_runtime.time(): # Timer isn't tested
return self.run_impl(db_session, *args, **kwargs)
except (DataError, IntegrityError):
log.exception(
"Errors related to the constraints of database happened",
extra=dict(task_args=args, task_kwargs=kwargs),
)
db_session.rollback()
self._rollback_django()
self.retry()
except SQLAlchemyError as ex:
self._analyse_error(ex, args, kwargs)
db_session.rollback()
self._rollback_django()
self.retry()
finally:
log_set_task_name(None)
log_set_task_id(None)
self.wrap_up_dbsession(db_session)
self._commit_django()

def wrap_up_dbsession(self, db_session):
"""
Expand Down Expand Up @@ -352,10 +338,9 @@ def wrap_up_dbsession(self, db_session):
)
get_db_session.remove()

def on_retry(self, *args, **kwargs):
res = super().on_retry(*args, **kwargs)
def on_retry(self, exc, task_id, args, kwargs, einfo):
res = super().on_retry(exc, task_id, args, kwargs, einfo)
self.task_retry_counter.inc()
metrics.incr(f"{self.metrics_prefix}.retries")
metric_context = MetricContext(
commit_sha=kwargs.get("commitid"),
repo_id=kwargs.get("repoid"),
Expand All @@ -364,10 +349,9 @@ def on_retry(self, *args, **kwargs):
metric_context.log_simple_metric(f"{self.metrics_prefix}.retry", 1.0)
return res

def on_success(self, *args, **kwargs):
res = super().on_success(*args, **kwargs)
def on_success(self, retval, task_id, args, kwargs):
res = super().on_success(retval, task_id, args, kwargs)

Check warning on line 353 in tasks/base.py

View check run for this annotation

Codecov Notifications / codecov/patch

tasks/base.py#L353

Added line #L353 was not covered by tests
self.task_success_counter.inc()
metrics.incr(f"{self.metrics_prefix}.successes")
metric_context = MetricContext(
commit_sha=kwargs.get("commitid"),
repo_id=kwargs.get("repoid"),
Expand All @@ -376,13 +360,12 @@ def on_success(self, *args, **kwargs):
metric_context.log_simple_metric(f"{self.metrics_prefix}.success", 1.0)
return res

def on_failure(self, *args, **kwargs):
def on_failure(self, exc, task_id, args, kwargs, einfo):
"""
Includes SoftTimeoutLimitException, for example
"""
res = super().on_failure(*args, **kwargs)
res = super().on_failure(exc, task_id, args, kwargs, einfo)

Check warning on line 367 in tasks/base.py

View check run for this annotation

Codecov Notifications / codecov/patch

tasks/base.py#L367

Added line #L367 was not covered by tests
self.task_failure_counter.inc()
metrics.incr(f"{self.metrics_prefix}.failures")
metric_context = MetricContext(
commit_sha=kwargs.get("commitid"),
repo_id=kwargs.get("repoid"),
Expand Down

0 comments on commit 7844c7a

Please sign in to comment.