From ed925d56f837d40aeece32e5e501fe89ed48a739 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Wed, 13 Nov 2019 22:59:59 +0200 Subject: [PATCH 01/24] enforce hard limits on non-responsive work horses by workers --- redash/cli/rq.py | 3 ++- redash/worker.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/redash/cli/rq.py b/redash/cli/rq.py index c103d6707d..eb917c8d39 100644 --- a/redash/cli/rq.py +++ b/redash/cli/rq.py @@ -5,10 +5,11 @@ from click import argument from flask.cli import AppGroup -from rq import Connection, Worker +from rq import Connection from sqlalchemy.orm import configure_mappers from redash import rq_redis_connection +from redash.worker import HardTimeLimitingWorker as Worker from redash.schedule import rq_scheduler, schedule_periodic_jobs, periodic_job_definitions manager = AppGroup(help="RQ management commands.") diff --git a/redash/worker.py b/redash/worker.py index 18da7de7a2..99431f0ecf 100644 --- a/redash/worker.py +++ b/redash/worker.py @@ -1,4 +1,5 @@ - +import errno +import os from datetime import timedelta from functools import partial @@ -9,8 +10,11 @@ from celery.signals import worker_process_init from celery.utils.log import get_logger -from rq import get_current_job +from rq import Worker, get_current_job +from rq.utils import utcnow from rq.decorators import job as rq_job +from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException +from rq.job import JobStatus from redash import create_app, extensions, settings, redis_connection, rq_redis_connection from redash.metrics import celery as celery_metrics # noqa @@ -93,3 +97,62 @@ def add_periodic_tasks(sender, **kwargs): for params in extensions.periodic_tasks.values(): # Add it to Celery's periodic task registry, too. sender.add_periodic_task(**params) + + +class HardTimeLimitingWorker(Worker): + grace_period = 15 + + def monitor_work_horse(self, job): + """The worker will monitor the work horse and make sure that it + either executes successfully or the status of the job is set to + failed + """ + monitor_started = utcnow() + while True: + try: + with UnixSignalDeathPenalty(self.job_monitoring_interval, HorseMonitorTimeoutException): + retpid, ret_val = os.waitpid(self._horse_pid, 0) + break + except HorseMonitorTimeoutException: + # Horse has not exited yet and is still running. + # Send a heartbeat to keep the worker alive. + self.heartbeat(self.job_monitoring_interval + 5) + + seconds_under_monitor = (utcnow() - monitor_started).seconds + if seconds_under_monitor > job.timeout + self.grace_period: + self.log.warning('Job %s exceeded timeout of %ds (+%ds grace period) but work horse did not terminate it. ' + 'Killing the work horse.', job.id, job.timeout, self.grace_period) + self.kill_horse() + except OSError as e: + # In case we encountered an OSError due to EINTR (which is + # caused by a SIGINT or SIGTERM signal during + # os.waitpid()), we simply ignore it and enter the next + # iteration of the loop, waiting for the child to end. In + # any other case, this is some other unexpected OS error, + # which we don't want to catch, so we re-raise those ones. + if e.errno != errno.EINTR: + raise + # Send a heartbeat to keep the worker alive. + self.heartbeat() + + if ret_val == os.EX_OK: # The process exited normally. + return + job_status = job.get_status() + if job_status is None: # Job completed and its ttl has expired + return + if job_status not in [JobStatus.FINISHED, JobStatus.FAILED]: + + if not job.ended_at: + job.ended_at = utcnow() + + # Unhandled failure: move the job to the failed queue + self.log.warning(( + 'Moving job to FailedJobRegistry ' + '(work-horse terminated unexpectedly; waitpid returned {})' + ).format(ret_val)) + + self.handle_job_failure( + job, + exc_string="Work-horse process was terminated unexpectedly " + "(waitpid returned %s)" % ret_val + ) From 859fe2ab776140cbd765cf920b6d1f3a2d9d027e Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Wed, 13 Nov 2019 23:30:01 +0200 Subject: [PATCH 02/24] move differences from Worker to helper methods to help make the specialization clearer --- redash/worker.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/redash/worker.py b/redash/worker.py index 99431f0ecf..d7a103218e 100644 --- a/redash/worker.py +++ b/redash/worker.py @@ -102,12 +102,21 @@ def add_periodic_tasks(sender, **kwargs): class HardTimeLimitingWorker(Worker): grace_period = 15 + def soft_limit_exceeded(self, job): + seconds_under_monitor = (utcnow() - self.monitor_started).seconds + return seconds_under_monitor > job.timeout + self.grace_period + + def enforce_hard_limit(self, job): + self.log.warning('Job %s exceeded timeout of %ds (+%ds grace period) but work horse did not terminate it. ' + 'Killing the work horse.', job.id, job.timeout, self.grace_period) + self.kill_horse() + def monitor_work_horse(self, job): """The worker will monitor the work horse and make sure that it either executes successfully or the status of the job is set to failed """ - monitor_started = utcnow() + self.monitor_started = utcnow() while True: try: with UnixSignalDeathPenalty(self.job_monitoring_interval, HorseMonitorTimeoutException): @@ -118,11 +127,8 @@ def monitor_work_horse(self, job): # Send a heartbeat to keep the worker alive. self.heartbeat(self.job_monitoring_interval + 5) - seconds_under_monitor = (utcnow() - monitor_started).seconds - if seconds_under_monitor > job.timeout + self.grace_period: - self.log.warning('Job %s exceeded timeout of %ds (+%ds grace period) but work horse did not terminate it. ' - 'Killing the work horse.', job.id, job.timeout, self.grace_period) - self.kill_horse() + if self.soft_limit_exceeded(job): + self.enforce_hard_limit(job) except OSError as e: # In case we encountered an OSError due to EINTR (which is # caused by a SIGINT or SIGTERM signal during From d12010095caff3dc884b2491294dc65fd98b16c9 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Thu, 14 Nov 2019 20:06:27 +0000 Subject: [PATCH 03/24] move HardLimitingWorker to redash/tasks --- redash/cli/rq.py | 2 +- redash/tasks/__init__.py | 1 + redash/tasks/hard_limiting_worker.py | 70 +++++++++++++++++++++++++++ redash/worker.py | 72 +--------------------------- 4 files changed, 73 insertions(+), 72 deletions(-) create mode 100644 redash/tasks/hard_limiting_worker.py diff --git a/redash/cli/rq.py b/redash/cli/rq.py index eb917c8d39..7ad2759e09 100644 --- a/redash/cli/rq.py +++ b/redash/cli/rq.py @@ -9,7 +9,7 @@ from sqlalchemy.orm import configure_mappers from redash import rq_redis_connection -from redash.worker import HardTimeLimitingWorker as Worker +from redash.tasks import HardLimitingWorker as Worker from redash.schedule import rq_scheduler, schedule_periodic_jobs, periodic_job_definitions manager = AppGroup(help="RQ management commands.") diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index 6f0d590514..cf195e64ea 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -3,3 +3,4 @@ refresh_schemas, cleanup_query_results, empty_schedules) from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors +from .hard_limiting_worker import HardLimitingWorker \ No newline at end of file diff --git a/redash/tasks/hard_limiting_worker.py b/redash/tasks/hard_limiting_worker.py new file mode 100644 index 0000000000..d95a5e045b --- /dev/null +++ b/redash/tasks/hard_limiting_worker.py @@ -0,0 +1,70 @@ +import errno +import os +from rq import Worker, get_current_job +from rq.utils import utcnow +from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException +from rq.job import JobStatus + +class HardLimitingWorker(Worker): + grace_period = 15 + + def soft_limit_exceeded(self, job): + seconds_under_monitor = (utcnow() - self.monitor_started).seconds + return seconds_under_monitor > job.timeout + self.grace_period + + def enforce_hard_limit(self, job): + self.log.warning('Job %s exceeded timeout of %ds (+%ds grace period) but work horse did not terminate it. ' + 'Killing the work horse.', job.id, job.timeout, self.grace_period) + self.kill_horse() + + def monitor_work_horse(self, job): + """The worker will monitor the work horse and make sure that it + either executes successfully or the status of the job is set to + failed + """ + self.monitor_started = utcnow() + while True: + try: + with UnixSignalDeathPenalty(self.job_monitoring_interval, HorseMonitorTimeoutException): + retpid, ret_val = os.waitpid(self._horse_pid, 0) + break + except HorseMonitorTimeoutException: + # Horse has not exited yet and is still running. + # Send a heartbeat to keep the worker alive. + self.heartbeat(self.job_monitoring_interval + 5) + + if self.soft_limit_exceeded(job): + self.enforce_hard_limit(job) + except OSError as e: + # In case we encountered an OSError due to EINTR (which is + # caused by a SIGINT or SIGTERM signal during + # os.waitpid()), we simply ignore it and enter the next + # iteration of the loop, waiting for the child to end. In + # any other case, this is some other unexpected OS error, + # which we don't want to catch, so we re-raise those ones. + if e.errno != errno.EINTR: + raise + # Send a heartbeat to keep the worker alive. + self.heartbeat() + + if ret_val == os.EX_OK: # The process exited normally. + return + job_status = job.get_status() + if job_status is None: # Job completed and its ttl has expired + return + if job_status not in [JobStatus.FINISHED, JobStatus.FAILED]: + + if not job.ended_at: + job.ended_at = utcnow() + + # Unhandled failure: move the job to the failed queue + self.log.warning(( + 'Moving job to FailedJobRegistry ' + '(work-horse terminated unexpectedly; waitpid returned {})' + ).format(ret_val)) + + self.handle_job_failure( + job, + exc_string="Work-horse process was terminated unexpectedly " + "(waitpid returned %s)" % ret_val + ) diff --git a/redash/worker.py b/redash/worker.py index d7a103218e..13ec1b18e9 100644 --- a/redash/worker.py +++ b/redash/worker.py @@ -1,5 +1,3 @@ -import errno -import os from datetime import timedelta from functools import partial @@ -10,11 +8,8 @@ from celery.signals import worker_process_init from celery.utils.log import get_logger -from rq import Worker, get_current_job -from rq.utils import utcnow +from rq import get_current_job from rq.decorators import job as rq_job -from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException -from rq.job import JobStatus from redash import create_app, extensions, settings, redis_connection, rq_redis_connection from redash.metrics import celery as celery_metrics # noqa @@ -97,68 +92,3 @@ def add_periodic_tasks(sender, **kwargs): for params in extensions.periodic_tasks.values(): # Add it to Celery's periodic task registry, too. sender.add_periodic_task(**params) - - -class HardTimeLimitingWorker(Worker): - grace_period = 15 - - def soft_limit_exceeded(self, job): - seconds_under_monitor = (utcnow() - self.monitor_started).seconds - return seconds_under_monitor > job.timeout + self.grace_period - - def enforce_hard_limit(self, job): - self.log.warning('Job %s exceeded timeout of %ds (+%ds grace period) but work horse did not terminate it. ' - 'Killing the work horse.', job.id, job.timeout, self.grace_period) - self.kill_horse() - - def monitor_work_horse(self, job): - """The worker will monitor the work horse and make sure that it - either executes successfully or the status of the job is set to - failed - """ - self.monitor_started = utcnow() - while True: - try: - with UnixSignalDeathPenalty(self.job_monitoring_interval, HorseMonitorTimeoutException): - retpid, ret_val = os.waitpid(self._horse_pid, 0) - break - except HorseMonitorTimeoutException: - # Horse has not exited yet and is still running. - # Send a heartbeat to keep the worker alive. - self.heartbeat(self.job_monitoring_interval + 5) - - if self.soft_limit_exceeded(job): - self.enforce_hard_limit(job) - except OSError as e: - # In case we encountered an OSError due to EINTR (which is - # caused by a SIGINT or SIGTERM signal during - # os.waitpid()), we simply ignore it and enter the next - # iteration of the loop, waiting for the child to end. In - # any other case, this is some other unexpected OS error, - # which we don't want to catch, so we re-raise those ones. - if e.errno != errno.EINTR: - raise - # Send a heartbeat to keep the worker alive. - self.heartbeat() - - if ret_val == os.EX_OK: # The process exited normally. - return - job_status = job.get_status() - if job_status is None: # Job completed and its ttl has expired - return - if job_status not in [JobStatus.FINISHED, JobStatus.FAILED]: - - if not job.ended_at: - job.ended_at = utcnow() - - # Unhandled failure: move the job to the failed queue - self.log.warning(( - 'Moving job to FailedJobRegistry ' - '(work-horse terminated unexpectedly; waitpid returned {})' - ).format(ret_val)) - - self.handle_job_failure( - job, - exc_string="Work-horse process was terminated unexpectedly " - "(waitpid returned %s)" % ret_val - ) From 1fa6abf219483cf364d762f35b4b31450fa56d59 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Sat, 16 Nov 2019 21:22:56 +0000 Subject: [PATCH 04/24] move schedule.py to /tasks --- redash/cli/rq.py | 3 +-- redash/tasks/__init__.py | 3 ++- redash/{ => tasks}/schedule.py | 0 tests/{ => tasks}/test_schedule.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename redash/{ => tasks}/schedule.py (100%) rename tests/{ => tasks}/test_schedule.py (92%) diff --git a/redash/cli/rq.py b/redash/cli/rq.py index 7ad2759e09..b9a450eb44 100644 --- a/redash/cli/rq.py +++ b/redash/cli/rq.py @@ -9,8 +9,7 @@ from sqlalchemy.orm import configure_mappers from redash import rq_redis_connection -from redash.tasks import HardLimitingWorker as Worker -from redash.schedule import rq_scheduler, schedule_periodic_jobs, periodic_job_definitions +from redash.tasks import HardLimitingWorker as Worker, rq_scheduler, schedule_periodic_jobs, periodic_job_definitions manager = AppGroup(help="RQ management commands.") diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index cf195e64ea..f622eb481b 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -3,4 +3,5 @@ refresh_schemas, cleanup_query_results, empty_schedules) from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors -from .hard_limiting_worker import HardLimitingWorker \ No newline at end of file +from .hard_limiting_worker import * +from .schedule import * \ No newline at end of file diff --git a/redash/schedule.py b/redash/tasks/schedule.py similarity index 100% rename from redash/schedule.py rename to redash/tasks/schedule.py diff --git a/tests/test_schedule.py b/tests/tasks/test_schedule.py similarity index 92% rename from tests/test_schedule.py rename to tests/tasks/test_schedule.py index 13a38b11ac..5a8b1d19b5 100644 --- a/tests/test_schedule.py +++ b/tests/tasks/test_schedule.py @@ -1,7 +1,7 @@ from unittest import TestCase from mock import patch, ANY -from redash.schedule import rq_scheduler, schedule_periodic_jobs +from redash.tasks.schedule import rq_scheduler, schedule_periodic_jobs class TestSchedule(TestCase): def setUp(self): @@ -26,7 +26,7 @@ def foo(): pass schedule_periodic_jobs([{"func": foo, "interval": 60}]) - with patch('redash.schedule.rq_scheduler.schedule') as schedule: + with patch('redash.tasks.rq_scheduler.schedule') as schedule: schedule_periodic_jobs([{"func": foo, "interval": 60}]) schedule.assert_not_called() From 1251b9b24ea23e2bd4233daa28b352e5bed0e073 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Sat, 16 Nov 2019 21:30:19 +0000 Subject: [PATCH 05/24] explain the motivation for HardLimitingWorker --- redash/tasks/hard_limiting_worker.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/redash/tasks/hard_limiting_worker.py b/redash/tasks/hard_limiting_worker.py index d95a5e045b..f85957e18b 100644 --- a/redash/tasks/hard_limiting_worker.py +++ b/redash/tasks/hard_limiting_worker.py @@ -5,6 +5,19 @@ from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException from rq.job import JobStatus +""" +RQ's work horses enforce time limits by setting a timed alarm and stopping jobs +when they reach their time limits. However, the work horse may be entirely blocked +and may not respond to the alarm interrupt. Since respecting timeouts is critical +in Redash (if we don't respect them, workers may be infinitely stuck and as a result, +service may be denied for other queries), we enforce two time limits: +1. A soft time limit, enforced by the work horse +2. A hard time limit, enforced by the parent worker + +The HardLimitingWorker class changes the default monitoring behavior of the default +RQ Worker by checking if the work horse is still busy with the job, even after +it should have timed out (+ a grace period of 15s). If it does, it kills the work horse. +""" class HardLimitingWorker(Worker): grace_period = 15 From 4ae624b2b3f7a76b7138390d98aaafec4be5792a Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Sun, 17 Nov 2019 09:04:53 +0000 Subject: [PATCH 06/24] pleasing CodeClimate --- redash/tasks/__init__.py | 2 +- redash/tasks/hard_limiting_worker.py | 26 +++++++++++++------------- redash/tasks/schedule.py | 1 + 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index f622eb481b..b75d01caad 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -4,4 +4,4 @@ from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors from .hard_limiting_worker import * -from .schedule import * \ No newline at end of file +from .schedule import * diff --git a/redash/tasks/hard_limiting_worker.py b/redash/tasks/hard_limiting_worker.py index f85957e18b..56e0788981 100644 --- a/redash/tasks/hard_limiting_worker.py +++ b/redash/tasks/hard_limiting_worker.py @@ -5,20 +5,20 @@ from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException from rq.job import JobStatus -""" -RQ's work horses enforce time limits by setting a timed alarm and stopping jobs -when they reach their time limits. However, the work horse may be entirely blocked -and may not respond to the alarm interrupt. Since respecting timeouts is critical -in Redash (if we don't respect them, workers may be infinitely stuck and as a result, -service may be denied for other queries), we enforce two time limits: -1. A soft time limit, enforced by the work horse -2. A hard time limit, enforced by the parent worker - -The HardLimitingWorker class changes the default monitoring behavior of the default -RQ Worker by checking if the work horse is still busy with the job, even after -it should have timed out (+ a grace period of 15s). If it does, it kills the work horse. -""" class HardLimitingWorker(Worker): + """ + RQ's work horses enforce time limits by setting a timed alarm and stopping jobs + when they reach their time limits. However, the work horse may be entirely blocked + and may not respond to the alarm interrupt. Since respecting timeouts is critical + in Redash (if we don't respect them, workers may be infinitely stuck and as a result, + service may be denied for other queries), we enforce two time limits: + 1. A soft time limit, enforced by the work horse + 2. A hard time limit, enforced by the parent worker + + The HardLimitingWorker class changes the default monitoring behavior of the default + RQ Worker by checking if the work horse is still busy with the job, even after + it should have timed out (+ a grace period of 15s). If it does, it kills the work horse. + """ grace_period = 15 def soft_limit_exceeded(self, job): diff --git a/redash/tasks/schedule.py b/redash/tasks/schedule.py index 8a577e4c39..b1a527cf42 100644 --- a/redash/tasks/schedule.py +++ b/redash/tasks/schedule.py @@ -21,6 +21,7 @@ queue_name="periodic", interval=5) + def job_id(kwargs): metadata = kwargs.copy() metadata['func'] = metadata['func'].__name__ From 9cfd453a1f7138ff5c1427a5a15b0fa5c8848b85 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Sun, 17 Nov 2019 09:18:26 +0000 Subject: [PATCH 07/24] pleasing CodeClimate --- redash/tasks/hard_limiting_worker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/redash/tasks/hard_limiting_worker.py b/redash/tasks/hard_limiting_worker.py index 56e0788981..c4bb30166b 100644 --- a/redash/tasks/hard_limiting_worker.py +++ b/redash/tasks/hard_limiting_worker.py @@ -5,6 +5,7 @@ from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException from rq.job import JobStatus + class HardLimitingWorker(Worker): """ RQ's work horses enforce time limits by setting a timed alarm and stopping jobs @@ -14,7 +15,7 @@ class HardLimitingWorker(Worker): service may be denied for other queries), we enforce two time limits: 1. A soft time limit, enforced by the work horse 2. A hard time limit, enforced by the parent worker - + The HardLimitingWorker class changes the default monitoring behavior of the default RQ Worker by checking if the work horse is still busy with the job, even after it should have timed out (+ a grace period of 15s). If it does, it kills the work horse. From 9c855cbe61be2b347f2da426b37fd74faa25e891 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Mon, 18 Nov 2019 06:40:03 +0000 Subject: [PATCH 08/24] port query execution to RQ --- redash/handlers/query_results.py | 4 +- redash/tasks/queries/execution.py | 100 ++++++++++++++---------------- 2 files changed, 50 insertions(+), 54 deletions(-) diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index 28d458863e..4a0d393485 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -317,12 +317,12 @@ def get(self, job_id, query_id=None): """ Retrieve info about a running query job. """ - job = QueryTask(job_id=job_id) + job = QueryTask(job_id) return {'job': job.to_dict()} def delete(self, job_id): """ Cancel a query job in progress. """ - job = QueryTask(job_id=job_id) + job = QueryTask(job_id) job.cancel() diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index eead672319..2cc5c72a61 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -2,19 +2,20 @@ import signal import time import redis -from celery.exceptions import SoftTimeLimitExceeded, TimeLimitExceeded -from celery.result import AsyncResult -from celery.utils.log import get_task_logger from six import text_type -from redash import models, redis_connection, settings +from rq import Queue, get_current_job +from rq.job import Job, JobStatus +from rq.timeouts import JobTimeoutException + +from redash import models, rq_redis_connection, redis_connection, settings from redash.query_runner import InterruptException from redash.tasks.alerts import check_alerts_for_query from redash.tasks.failure_report import track_failure from redash.utils import gen_query_hash, json_dumps, utcnow -from redash.worker import celery +from redash.worker import celery, get_job_logger -logger = get_task_logger(__name__) +logger = get_job_logger(__name__) TIMEOUT_MESSAGE = "Query exceeded Redash query execution time limit." @@ -29,51 +30,48 @@ def _unlock(query_hash, data_source_id): class QueryTask(object): # TODO: this is mapping to the old Job class statuses. Need to update the client side and remove this STATUSES = { - 'PENDING': 1, - 'STARTED': 2, - 'SUCCESS': 3, - 'FAILURE': 4, - 'REVOKED': 4 + JobStatus.QUEUED: 1, + JobStatus.STARTED: 2, + JobStatus.FINISHED: 3, + JobStatus.FAILED: 4, + 'REVOKED': 4 # TODO - find a representation for cancelled jobs in RQ } - def __init__(self, job_id=None, async_result=None): - if async_result: - self._async_result = async_result - else: - self._async_result = AsyncResult(job_id, app=celery) + def __init__(self, job): + self._job = job if isinstance(job, Job) else Job.fetch(job, connection=rq_redis_connection) @property def id(self): - return self._async_result.id + return self._job.id def to_dict(self): - task_info = self._async_result._get_task_meta() - result, task_status = task_info['result'], task_info['status'] - if task_status == 'STARTED': - updated_at = result.get('start_time', 0) + job_status = self.rq_status + if job_status == JobStatus.STARTED: + updated_at = self._job.started_at or 0 else: updated_at = 0 - status = self.STATUSES[task_status] + status = self.STATUSES[job_status] - if isinstance(result, (TimeLimitExceeded, SoftTimeLimitExceeded)): + result = self._job.result + if isinstance(result, JobTimeoutException): error = TIMEOUT_MESSAGE status = 4 elif isinstance(result, Exception): error = str(result) status = 4 - elif task_status == 'REVOKED': + elif job_status == 'REVOKED': # TODO - find a representation for cancelled jobs in RQ error = 'Query execution cancelled.' else: error = '' - if task_status == 'SUCCESS' and not error: + if job_status == JobStatus.FINISHED and not error: query_result_id = result else: query_result_id = None return { - 'id': self._async_result.id, + 'id': self.id, 'updated_at': updated_at, 'status': status, 'error': error, @@ -82,17 +80,17 @@ def to_dict(self): @property def is_cancelled(self): - return self._async_result.status == 'REVOKED' + return self.rq_status == 'REVOKED' # TODO - find a representation for cancelled jobs in RQ @property - def celery_status(self): - return self._async_result.status + def rq_status(self): + return self._job.get_status() def ready(self): - return self._async_result.ready() + return self._job.is_finished def cancel(self): - return self._async_result.revoke(terminate=True, signal='SIGINT') + self._job.cancel() def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): @@ -111,10 +109,10 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) - job = QueryTask(job_id=job_id) + job = QueryTask(job_id) if job.ready(): - logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status) + logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.rq_status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None @@ -128,7 +126,6 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query queue_name = data_source.queue_name scheduled_query_id = None - args = (query, data_source.id, metadata, user_id, scheduled_query_id, is_api_key) argsrepr = json_dumps({ 'org_id': data_source.org_id, 'data_source_id': data_source.id, @@ -139,13 +136,17 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query }) time_limit = settings.dynamic_settings.query_time_limit(scheduled_query, user_id, data_source.org_id) + metadata['Queue'] = queue_name - result = execute_query.apply_async(args=args, - argsrepr=argsrepr, - queue=queue_name, - soft_time_limit=time_limit) + # TODO - what's argsrepr? + queue = Queue(queue_name, connection=rq_redis_connection) + result = queue.enqueue(execute_query, query, data_source.id, metadata, + user_id=user_id, + scheduled_query_id=scheduled_query_id, + is_api_key=is_api_key, + job_timeout=time_limit) - job = QueryTask(async_result=result) + job = QueryTask(result) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() @@ -184,12 +185,10 @@ def _resolve_user(user_id, is_api_key, query_id): return None -# We could have created this as a celery.Task derived class, and act as the task itself. But this might result in weird -# issues as the task class created once per process, so decided to have a plain object instead. class QueryExecutor(object): - def __init__(self, task, query, data_source_id, user_id, is_api_key, metadata, + def __init__(self, query, data_source_id, user_id, is_api_key, metadata, scheduled_query): - self.task = task + self.job = get_current_job() self.query = query self.data_source_id = data_source_id self.metadata = metadata @@ -217,7 +216,7 @@ def run(self): try: data, error = query_runner.run_query(annotated_query, self.user) except Exception as e: - if isinstance(e, SoftTimeLimitExceeded): + if isinstance(e, JobTimeoutException): error = TIMEOUT_MESSAGE else: error = text_type(e) @@ -262,9 +261,8 @@ def run(self): return result def _annotate_query(self, query_runner): - self.metadata['Task ID'] = self.task.request.id + self.metadata['Task ID'] = self.job.id self.metadata['Query Hash'] = self.query_hash - self.metadata['Queue'] = self.task.request.delivery_info['routing_key'] self.metadata['Scheduled'] = self.scheduled_query is not None return query_runner.annotate_query(self.query, self.metadata) @@ -274,8 +272,8 @@ def _log_progress(self, state): "task=execute_query state=%s query_hash=%s type=%s ds_id=%d " "task_id=%s queue=%s query_id=%s username=%s", state, self.query_hash, self.data_source.type, self.data_source.id, - self.task.request.id, - self.task.request.delivery_info['routing_key'], + self.job.id, + self.metadata.get('Queue', 'unknown'), self.metadata.get('Query ID', 'unknown'), self.metadata.get('Username', 'unknown')) @@ -286,13 +284,11 @@ def _load_data_source(self): # user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted # jobs before the upgrade to this version. -@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True) -def execute_query(self, query, data_source_id, metadata, user_id=None, +def execute_query(query, data_source_id, metadata, user_id=None, scheduled_query_id=None, is_api_key=False): if scheduled_query_id is not None: scheduled_query = models.Query.query.get(scheduled_query_id) else: scheduled_query = None - return QueryExecutor(self, query, data_source_id, user_id, is_api_key, metadata, - scheduled_query).run() + return QueryExecutor(query, data_source_id, user_id, is_api_key, metadata, scheduled_query).run() From 0c8f0b440f9fc1d2f0e37d1a209afef1ffd90f77 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Mon, 18 Nov 2019 06:57:58 +0000 Subject: [PATCH 09/24] get rid of argsrepr --- redash/tasks/queries/execution.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 2cc5c72a61..4093f5542d 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -126,19 +126,9 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query queue_name = data_source.queue_name scheduled_query_id = None - argsrepr = json_dumps({ - 'org_id': data_source.org_id, - 'data_source_id': data_source.id, - 'enqueue_time': time.time(), - 'scheduled': scheduled_query_id is not None, - 'query_id': metadata.get('Query ID'), - 'user_id': user_id - }) - time_limit = settings.dynamic_settings.query_time_limit(scheduled_query, user_id, data_source.org_id) metadata['Queue'] = queue_name - # TODO - what's argsrepr? queue = Queue(queue_name, connection=rq_redis_connection) result = queue.enqueue(execute_query, query, data_source.id, metadata, user_id=user_id, From 768f0f610e467ba7b321815ff4dfcff4af6b448b Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Tue, 26 Nov 2019 13:10:56 +0000 Subject: [PATCH 10/24] avoid star imports --- redash/tasks/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index b75d01caad..e51b55b52c 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -3,5 +3,5 @@ refresh_schemas, cleanup_query_results, empty_schedules) from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors -from .hard_limiting_worker import * -from .schedule import * +from .hard_limiting_worker import HardLimitingWorker +from .schedule import rq_scheduler, schedule_periodic_jobs, periodic_job_definitions From edd656fef72a2af5b860e7488bbd3dcb6a06e997 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Thu, 28 Nov 2019 14:40:12 +0000 Subject: [PATCH 11/24] allow queries to be cancelled in RQ --- redash/cli/rq.py | 4 ++-- redash/tasks/__init__.py | 2 +- redash/tasks/hard_limiting_worker.py | 35 ++++++++++++++++++++++++++-- redash/tasks/queries/execution.py | 16 ++++++------- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/redash/cli/rq.py b/redash/cli/rq.py index 1547f4fb5f..ebf9cc5604 100644 --- a/redash/cli/rq.py +++ b/redash/cli/rq.py @@ -30,10 +30,10 @@ def worker(queues): configure_mappers() if not queues: - queues = ['periodic', 'emails', 'default', 'schemas'] + queues = ['queries', 'periodic', 'emails', 'default', 'schemas'] with Connection(rq_redis_connection): - w = Worker(queues, log_job_description=False) + w = Worker(queues, log_job_description=False, job_monitoring_interval=5) w.work() diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index e51b55b52c..9d3023a4ab 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -3,5 +3,5 @@ refresh_schemas, cleanup_query_results, empty_schedules) from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors -from .hard_limiting_worker import HardLimitingWorker +from .hard_limiting_worker import HardLimitingWorker, CancellableQueue, CancellableJob from .schedule import rq_scheduler, schedule_periodic_jobs, periodic_job_definitions diff --git a/redash/tasks/hard_limiting_worker.py b/redash/tasks/hard_limiting_worker.py index c4bb30166b..b037c743fd 100644 --- a/redash/tasks/hard_limiting_worker.py +++ b/redash/tasks/hard_limiting_worker.py @@ -1,9 +1,29 @@ import errno import os -from rq import Worker, get_current_job +import signal +import time +from rq import Worker, Queue, get_current_job from rq.utils import utcnow from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException -from rq.job import JobStatus +from rq.job import Job, JobStatus + + +class CancellableJob(Job): + def cancel(self, pipeline=None): + # TODO - add tests that verify that queued jobs are removed from queue and running jobs are actively cancelled + if self.is_started: + self.meta['cancelled'] = True + self.save_meta() + + super().cancel(pipeline=pipeline) + + @property + def is_cancelled(self): + return self.meta.get('cancelled', False) + + +class CancellableQueue(Queue): + job_class = CancellableJob class HardLimitingWorker(Worker): @@ -21,6 +41,12 @@ class HardLimitingWorker(Worker): it should have timed out (+ a grace period of 15s). If it does, it kills the work horse. """ grace_period = 15 + queue_class = CancellableQueue + job_class = CancellableJob + + def stop_executing_job(self, job): + os.kill(self.horse_pid, signal.SIGINT) + self.log.warning('Job %s has been cancelled.', job.id) def soft_limit_exceeded(self, job): seconds_under_monitor = (utcnow() - self.monitor_started).seconds @@ -47,6 +73,11 @@ def monitor_work_horse(self, job): # Send a heartbeat to keep the worker alive. self.heartbeat(self.job_monitoring_interval + 5) + job.refresh() + + if job.is_cancelled: + self.stop_executing_job(job) + if self.soft_limit_exceeded(job): self.enforce_hard_limit(job) except OSError as e: diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 4093f5542d..c86d8cfc8d 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -4,12 +4,13 @@ import redis from six import text_type -from rq import Queue, get_current_job +from rq import get_current_job from rq.job import Job, JobStatus from rq.timeouts import JobTimeoutException from redash import models, rq_redis_connection, redis_connection, settings from redash.query_runner import InterruptException +from redash.tasks.hard_limiting_worker import CancellableQueue, CancellableJob from redash.tasks.alerts import check_alerts_for_query from redash.tasks.failure_report import track_failure from redash.utils import gen_query_hash, json_dumps, utcnow @@ -33,12 +34,11 @@ class QueryTask(object): JobStatus.QUEUED: 1, JobStatus.STARTED: 2, JobStatus.FINISHED: 3, - JobStatus.FAILED: 4, - 'REVOKED': 4 # TODO - find a representation for cancelled jobs in RQ + JobStatus.FAILED: 4 } def __init__(self, job): - self._job = job if isinstance(job, Job) else Job.fetch(job, connection=rq_redis_connection) + self._job = job if isinstance(job, Job) else CancellableJob.fetch(job, connection=rq_redis_connection) @property def id(self): @@ -57,10 +57,10 @@ def to_dict(self): if isinstance(result, JobTimeoutException): error = TIMEOUT_MESSAGE status = 4 - elif isinstance(result, Exception): + elif isinstance(result, Exception): # TODO - verify exception messages are returned error = str(result) status = 4 - elif job_status == 'REVOKED': # TODO - find a representation for cancelled jobs in RQ + elif self.is_cancelled: error = 'Query execution cancelled.' else: error = '' @@ -80,7 +80,7 @@ def to_dict(self): @property def is_cancelled(self): - return self.rq_status == 'REVOKED' # TODO - find a representation for cancelled jobs in RQ + return self._job.is_cancelled @property def rq_status(self): @@ -129,7 +129,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query time_limit = settings.dynamic_settings.query_time_limit(scheduled_query, user_id, data_source.org_id) metadata['Queue'] = queue_name - queue = Queue(queue_name, connection=rq_redis_connection) + queue = CancellableQueue(queue_name, connection=rq_redis_connection) result = queue.enqueue(execute_query, query, data_source.id, metadata, user_id=user_id, scheduled_query_id=scheduled_query_id, From 1d8af9c22b849ddc56bd6c547e1c8ff4964ee010 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Thu, 28 Nov 2019 15:10:17 +0000 Subject: [PATCH 12/24] return QueryExecutionErrors as job results --- redash/tasks/queries/execution.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index c86d8cfc8d..f15825fdf2 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -281,4 +281,7 @@ def execute_query(query, data_source_id, metadata, user_id=None, else: scheduled_query = None - return QueryExecutor(query, data_source_id, user_id, is_api_key, metadata, scheduled_query).run() + try: + return QueryExecutor(query, data_source_id, user_id, is_api_key, metadata, scheduled_query).run() + except QueryExecutionError as e: + return e From a96ee82046d421fb5c03c9cc35fc9697af5d35cf Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Mon, 2 Dec 2019 10:46:44 +0000 Subject: [PATCH 13/24] fix TestTaskEnqueue and QueryExecutorTests --- redash/tasks/queries/execution.py | 14 +++--- tests/tasks/test_queries.py | 83 ++++++++++++++++--------------- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index f15825fdf2..88da0346b6 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -60,7 +60,7 @@ def to_dict(self): elif isinstance(result, Exception): # TODO - verify exception messages are returned error = str(result) status = 4 - elif self.is_cancelled: + elif self.cancelled: error = 'Query execution cancelled.' else: error = '' @@ -78,17 +78,18 @@ def to_dict(self): 'query_result_id': query_result_id, } - @property - def is_cancelled(self): - return self._job.is_cancelled - @property def rq_status(self): return self._job.get_status() + @property def ready(self): return self._job.is_finished + @property + def cancelled(self): + return self._job.is_cancelled + def cancel(self): self._job.cancel() @@ -111,7 +112,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query job = QueryTask(job_id) - if job.ready(): + if job.ready: logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.rq_status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None @@ -284,4 +285,5 @@ def execute_query(query, data_source_id, metadata, user_id=None, try: return QueryExecutor(query, data_source_id, user_id, is_api_key, metadata, scheduled_query).run() except QueryExecutionError as e: + models.db.session.rollback() return e diff --git a/tests/tasks/test_queries.py b/tests/tasks/test_queries.py index ea8efc7126..e496e6df2c 100644 --- a/tests/tasks/test_queries.py +++ b/tests/tasks/test_queries.py @@ -3,61 +3,70 @@ import uuid import mock +from mock import patch from tests import BaseTestCase -from redash import redis_connection, models +from redash import redis_connection, rq_redis_connection, models from redash.utils import json_dumps from redash.query_runner.pg import PostgreSQL from redash.tasks.queries.execution import QueryExecutionError, enqueue_query, execute_query +from redash.tasks import CancellableJob -FakeResult = namedtuple('FakeResult', 'id') +FakeResult = namedtuple('FakeResult', 'id ready') -def gen_hash(*args, **kwargs): - return FakeResult(uuid.uuid4().hex) +def generate_query_task(*args, **kwargs): + if any(args): + job_id = args[0] if isinstance(args[0], str) else args[0].id + else: + job_id = create_job().id + return FakeResult(job_id, False) + +def create_job(*args, **kwargs): + return CancellableJob(connection=rq_redis_connection) + + +@patch('redash.tasks.queries.execution.QueryTask', side_effect=generate_query_task) +@patch('redash.tasks.queries.execution.CancellableQueue.enqueue', side_effect=create_job) class TestEnqueueTask(BaseTestCase): - def test_multiple_enqueue_of_same_query(self): + def test_multiple_enqueue_of_same_query(self, enqueue, _): query = self.factory.create_query() - execute_query.apply_async = mock.MagicMock(side_effect=gen_hash) - + enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) - - self.assertEqual(1, execute_query.apply_async.call_count) + + self.assertEqual(1, enqueue.call_count) @mock.patch('redash.settings.dynamic_settings.query_time_limit', return_value=60) - def test_limits_query_time(self, _): + def test_limits_query_time(self, _, enqueue, __): query = self.factory.create_query() - execute_query.apply_async = mock.MagicMock(side_effect=gen_hash) enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) - _, kwargs = execute_query.apply_async.call_args - self.assertEqual(60, kwargs.get('soft_time_limit')) + _, kwargs = enqueue.call_args + self.assertEqual(60, kwargs.get('job_timeout')) - def test_multiple_enqueue_of_different_query(self): + def test_multiple_enqueue_of_different_query(self, enqueue, _): query = self.factory.create_query() - execute_query.apply_async = mock.MagicMock(side_effect=gen_hash) enqueue_query(query.query_text, query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) enqueue_query(query.query_text + '2', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) enqueue_query(query.query_text + '3', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) - self.assertEqual(3, execute_query.apply_async.call_count) + self.assertEqual(3, enqueue.call_count) +@patch('redash.tasks.queries.execution.get_current_job', side_effect=generate_query_task) class QueryExecutorTests(BaseTestCase): - - def test_success(self): + def test_success(self, _): """ ``execute_query`` invokes the query runner and stores a query result. """ - cm = mock.patch("celery.app.task.Context.delivery_info", {'routing_key': 'test'}) - with cm, mock.patch.object(PostgreSQL, "run_query") as qr: + with mock.patch.object(PostgreSQL, "run_query") as qr: query_result_data = {"columns": [], "rows": []} qr.return_value = (json_dumps(query_result_data), None) result_id = execute_query("SELECT 1, 2", self.factory.data_source.id, {}) @@ -65,14 +74,12 @@ def test_success(self): result = models.QueryResult.query.get(result_id) self.assertEqual(result.data, query_result_data) - def test_success_scheduled(self): + def test_success_scheduled(self, _): """ Scheduled queries remember their latest results. """ - cm = mock.patch("celery.app.task.Context.delivery_info", - {'routing_key': 'test'}) q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) - with cm, mock.patch.object(PostgreSQL, "run_query") as qr: + with mock.patch.object(PostgreSQL, "run_query") as qr: qr.return_value = ([1, 2], None) result_id = execute_query( "SELECT 1, 2", @@ -83,43 +90,41 @@ def test_success_scheduled(self): result = models.QueryResult.query.get(result_id) self.assertEqual(q.latest_query_data, result) - def test_failure_scheduled(self): + def test_failure_scheduled(self, _): """ Scheduled queries that fail have their failure recorded. """ - cm = mock.patch("celery.app.task.Context.delivery_info", - {'routing_key': 'test'}) q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) - with cm, mock.patch.object(PostgreSQL, "run_query") as qr: + with mock.patch.object(PostgreSQL, "run_query") as qr: qr.side_effect = ValueError("broken") - with self.assertRaises(QueryExecutionError): - execute_query("SELECT 1, 2", self.factory.data_source.id, {}, + + result = execute_query("SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id) + self.assertTrue(isinstance(result, QueryExecutionError)) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 1) - with self.assertRaises(QueryExecutionError): - execute_query("SELECT 1, 2", self.factory.data_source.id, {}, + + result = execute_query("SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id) + self.assertTrue(isinstance(result, QueryExecutionError)) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 2) - def test_success_after_failure(self): + def test_success_after_failure(self, _): """ Query execution success resets the failure counter. """ - cm = mock.patch("celery.app.task.Context.delivery_info", - {'routing_key': 'test'}) q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) - with cm, mock.patch.object(PostgreSQL, "run_query") as qr: + with mock.patch.object(PostgreSQL, "run_query") as qr: qr.side_effect = ValueError("broken") - with self.assertRaises(QueryExecutionError): - execute_query("SELECT 1, 2", + result = execute_query("SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id) + self.assertTrue(isinstance(result, QueryExecutionError)) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 1) - with cm, mock.patch.object(PostgreSQL, "run_query") as qr: + with mock.patch.object(PostgreSQL, "run_query") as qr: qr.return_value = ([1, 2], None) execute_query("SELECT 1, 2", self.factory.data_source.id, {}, From 37a74ea403301a21db9c36b3f05fbdc26cbd3530 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Wed, 4 Dec 2019 19:40:02 +0000 Subject: [PATCH 14/24] remove Celery monitoring --- client/app/components/admin/CeleryStatus.jsx | 95 ------------- client/app/components/admin/Layout.jsx | 3 - client/app/components/admin/RQStatus.jsx | 25 ++++ client/app/pages/admin/Jobs.jsx | 3 +- client/app/pages/admin/Tasks.jsx | 135 ------------------- redash/handlers/admin.py | 18 +-- redash/monitor.py | 79 +---------- 7 files changed, 28 insertions(+), 330 deletions(-) delete mode 100644 client/app/components/admin/CeleryStatus.jsx delete mode 100644 client/app/pages/admin/Tasks.jsx diff --git a/client/app/components/admin/CeleryStatus.jsx b/client/app/components/admin/CeleryStatus.jsx deleted file mode 100644 index 959b10e6ce..0000000000 --- a/client/app/components/admin/CeleryStatus.jsx +++ /dev/null @@ -1,95 +0,0 @@ -import { map } from 'lodash'; -import React from 'react'; -import PropTypes from 'prop-types'; - -import Table from 'antd/lib/table'; -import Card from 'antd/lib/card'; -import Spin from 'antd/lib/spin'; -import Badge from 'antd/lib/badge'; -import { Columns } from '@/components/items-list/components/ItemsTable'; - -// CounterCard - -export function CounterCard({ title, value, loading }) { - return ( - - - {title} -
{value}
-
-
- ); -} - -CounterCard.propTypes = { - title: PropTypes.string.isRequired, - value: PropTypes.oneOfType([PropTypes.number, PropTypes.string]), - loading: PropTypes.bool.isRequired, -}; - -CounterCard.defaultProps = { - value: '', -}; - -// Tables - -const commonColumns = [ - { title: 'Worker Name', dataIndex: 'worker' }, - { title: 'PID', dataIndex: 'worker_pid' }, - { title: 'Queue', dataIndex: 'queue' }, - Columns.custom((value) => { - if (value === 'active') { - return Active; - } - return {value}; - }, { - title: 'State', - dataIndex: 'state', - }), - Columns.timeAgo({ title: 'Start Time', dataIndex: 'start_time' }), -]; - -const queryColumns = commonColumns.concat([ - Columns.timeAgo({ title: 'Enqueue Time', dataIndex: 'enqueue_time' }), - { title: 'Query ID', dataIndex: 'query_id' }, - { title: 'Org ID', dataIndex: 'org_id' }, - { title: 'Data Source ID', dataIndex: 'data_source_id' }, - { title: 'User ID', dataIndex: 'user_id' }, - { title: 'Scheduled', dataIndex: 'scheduled' }, -]); - -const queuesColumns = map( - ['Name', 'Active', 'Reserved', 'Waiting'], - c => ({ title: c, dataIndex: c.toLowerCase() }), -); - -const TablePropTypes = { - loading: PropTypes.bool.isRequired, - items: PropTypes.arrayOf(PropTypes.object).isRequired, -}; - -export function QueuesTable({ loading, items }) { - return ( - - ); -} - -QueuesTable.propTypes = TablePropTypes; - -export function QueriesTable({ loading, items }) { - return ( -
- ); -} - -QueriesTable.propTypes = TablePropTypes; diff --git a/client/app/components/admin/Layout.jsx b/client/app/components/admin/Layout.jsx index 36f2f7e70a..4ac9621b27 100644 --- a/client/app/components/admin/Layout.jsx +++ b/client/app/components/admin/Layout.jsx @@ -15,9 +15,6 @@ export default function Layout({ activeTab, children }) { System Status}> {(activeTab === 'system_status') ? children : null} - Celery Status}> - {(activeTab === 'tasks') ? children : null} - RQ Status}> {(activeTab === 'jobs') ? children : null} diff --git a/client/app/components/admin/RQStatus.jsx b/client/app/components/admin/RQStatus.jsx index 88c937dd17..ffc14a679a 100644 --- a/client/app/components/admin/RQStatus.jsx +++ b/client/app/components/admin/RQStatus.jsx @@ -3,9 +3,34 @@ import React from 'react'; import PropTypes from 'prop-types'; import Badge from 'antd/lib/badge'; +import Card from 'antd/lib/card'; +import Spin from 'antd/lib/spin'; import Table from 'antd/lib/table'; import { Columns } from '@/components/items-list/components/ItemsTable'; +// CounterCard + +export function CounterCard({ title, value, loading }) { + return ( + + + {title} +
{value}
+
+
+ ); +} + +CounterCard.propTypes = { + title: PropTypes.string.isRequired, + value: PropTypes.oneOfType([PropTypes.number, PropTypes.string]), + loading: PropTypes.bool.isRequired, +}; + +CounterCard.defaultProps = { + value: '', +}; + // Tables const otherJobsColumns = [ diff --git a/client/app/pages/admin/Jobs.jsx b/client/app/pages/admin/Jobs.jsx index f04efc39bc..60fa0a9a60 100644 --- a/client/app/pages/admin/Jobs.jsx +++ b/client/app/pages/admin/Jobs.jsx @@ -6,8 +6,7 @@ import Alert from 'antd/lib/alert'; import Tabs from 'antd/lib/tabs'; import * as Grid from 'antd/lib/grid'; import Layout from '@/components/admin/Layout'; -import { CounterCard } from '@/components/admin/CeleryStatus'; -import { WorkersTable, QueuesTable, OtherJobsTable } from '@/components/admin/RQStatus'; +import { CounterCard, WorkersTable, QueuesTable, OtherJobsTable } from '@/components/admin/RQStatus'; import { $http, $location, $rootScope } from '@/services/ng'; import recordEvent from '@/services/recordEvent'; diff --git a/client/app/pages/admin/Tasks.jsx b/client/app/pages/admin/Tasks.jsx deleted file mode 100644 index 566d5c94a3..0000000000 --- a/client/app/pages/admin/Tasks.jsx +++ /dev/null @@ -1,135 +0,0 @@ -import { values, each } from 'lodash'; -import moment from 'moment'; -import React from 'react'; -import { react2angular } from 'react2angular'; - -import Alert from 'antd/lib/alert'; -import Tabs from 'antd/lib/tabs'; -import * as Grid from 'antd/lib/grid'; -import Layout from '@/components/admin/Layout'; -import { CounterCard, QueuesTable, QueriesTable } from '@/components/admin/CeleryStatus'; - -import { $http } from '@/services/ng'; -import recordEvent from '@/services/recordEvent'; -import { routesToAngularRoutes } from '@/lib/utils'; - -// Converting name coming from API to the one the UI expects. -// TODO: update the UI components to use `waiting_in_queue` instead of `waiting`. -function stateName(state) { - if (state === 'waiting_in_queue') { - return 'waiting'; - } - return state; -} - -class Tasks extends React.Component { - state = { - isLoading: true, - error: null, - - queues: [], - queries: [], - counters: { active: 0, reserved: 0, waiting: 0 }, - }; - - componentDidMount() { - recordEvent('view', 'page', 'admin/tasks'); - $http - .get('/api/admin/queries/tasks') - .then(({ data }) => this.processTasks(data.tasks)) - .catch(error => this.handleError(error)); - } - - componentWillUnmount() { - // Ignore data after component unmounted - this.processTasks = () => {}; - this.handleError = () => {}; - } - - processTasks = (tasks) => { - const queues = {}; - const queries = []; - - const counters = { active: 0, reserved: 0, waiting: 0 }; - - each(tasks, (task) => { - task.state = stateName(task.state); - queues[task.queue] = queues[task.queue] || { name: task.queue, active: 0, reserved: 0, waiting: 0 }; - queues[task.queue][task.state] += 1; - - if (task.enqueue_time) { - task.enqueue_time = moment(task.enqueue_time * 1000.0); - } - if (task.start_time) { - task.start_time = moment(task.start_time * 1000.0); - } - - counters[task.state] += 1; - - if (task.task_name === 'redash.tasks.execute_query') { - queries.push(task); - } - }); - - this.setState({ isLoading: false, queues: values(queues), queries, counters }); - }; - - handleError = (error) => { - this.setState({ isLoading: false, error }); - }; - - render() { - const { isLoading, error, queues, queries, counters } = this.state; - - return ( - -
- {error && ( - - )} - - {!error && ( - - - - - - - - - - - - - - - - - - - - - - - )} -
-
- ); - } -} - -export default function init(ngModule) { - ngModule.component('pageTasks', react2angular(Tasks)); - - return routesToAngularRoutes([ - { - path: '/admin/queries/tasks', - title: 'Celery Status', - key: 'tasks', - }, - ], { - template: '', - }); -} - -init.init = true; diff --git a/redash/handlers/admin.py b/redash/handlers/admin.py index ba8dd30ec9..62eeaf1309 100644 --- a/redash/handlers/admin.py +++ b/redash/handlers/admin.py @@ -8,7 +8,7 @@ from redash.permissions import require_super_admin from redash.serializers import QuerySerializer from redash.utils import json_loads -from redash.monitor import celery_tasks, rq_status +from redash.monitor import rq_status @routes.route('/api/admin/queries/outdated', methods=['GET']) @@ -38,22 +38,6 @@ def outdated_queries(): return json_response(response) -@routes.route('/api/admin/queries/tasks', methods=['GET']) -@require_super_admin -@login_required -def queries_tasks(): - record_event(current_org, current_user._get_current_object(), { - 'action': 'list', - 'object_type': 'celery_tasks' - }) - - response = { - 'tasks': celery_tasks(), - } - - return json_response(response) - - @routes.route('/api/admin/queries/rq_status', methods=['GET']) @require_super_admin @login_required diff --git a/redash/monitor.py b/redash/monitor.py index cd9471dff0..b062db295d 100644 --- a/redash/monitor.py +++ b/redash/monitor.py @@ -4,7 +4,6 @@ from redash import redis_connection, rq_redis_connection, __version__, settings from redash.models import db, DataSource, Query, QueryResult, Dashboard, Widget from redash.utils import json_loads -from redash.worker import celery from rq import Queue, Worker from rq.job import Job from rq.registry import StartedJobRegistry @@ -26,17 +25,8 @@ def get_object_counts(): return status -def get_celery_queues(): - queue_names = db.session.query(DataSource.queue_name).distinct() - scheduled_queue_names = db.session.query(DataSource.scheduled_queue_name).distinct() - query = db.session.execute(union_all(queue_names, scheduled_queue_names)) - - return ['celery'] + [row[0] for row in query] - - def get_queues_status(): - return {**{queue: {'size': redis_connection.llen(queue)} for queue in get_celery_queues()}, - **{queue.name: {'size': len(queue)} for queue in Queue.all(connection=rq_redis_connection)}} + return {queue.name: {'size': len(queue)} for queue in Queue.all(connection=rq_redis_connection)} def get_db_sizes(): @@ -67,73 +57,6 @@ def get_status(): return status -def get_waiting_in_queue(queue_name): - jobs = [] - for raw in redis_connection.lrange(queue_name, 0, -1): - job = json_loads(raw) - try: - args = json_loads(job['headers']['argsrepr']) - if args.get('query_id') == 'adhoc': - args['query_id'] = None - except ValueError: - args = {} - - job_row = { - 'state': 'waiting_in_queue', - 'task_name': job['headers']['task'], - 'worker': None, - 'worker_pid': None, - 'start_time': None, - 'task_id': job['headers']['id'], - 'queue': job['properties']['delivery_info']['routing_key'] - } - - job_row.update(args) - jobs.append(job_row) - - return jobs - - -def parse_tasks(task_lists, state): - rows = [] - - for task in itertools.chain(*task_lists.values()): - task_row = { - 'state': state, - 'task_name': task['name'], - 'worker': task['hostname'], - 'queue': task['delivery_info']['routing_key'], - 'task_id': task['id'], - 'worker_pid': task['worker_pid'], - 'start_time': task['time_start'], - } - - if task['name'] == 'redash.tasks.execute_query': - try: - args = json_loads(task['args']) - except ValueError: - args = {} - - if args.get('query_id') == 'adhoc': - args['query_id'] = None - - task_row.update(args) - - rows.append(task_row) - - return rows - - -def celery_tasks(): - tasks = parse_tasks(celery.control.inspect().active(), 'active') - tasks += parse_tasks(celery.control.inspect().reserved(), 'reserved') - - for queue_name in get_celery_queues(): - tasks += get_waiting_in_queue(queue_name) - - return tasks - - def fetch_jobs(queue, job_ids): return [{ 'id': job.id, From faf516603de82f16b5342617298b1646b85d8a87 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Wed, 4 Dec 2019 21:23:14 +0000 Subject: [PATCH 15/24] get rid of QueryTask and use RQ jobs directly (with a job serializer) --- redash/handlers/query_results.py | 14 +++--- redash/serializers/__init__.py | 41 +++++++++++++++ redash/tasks/__init__.py | 2 +- redash/tasks/queries/__init__.py | 2 +- redash/tasks/queries/execution.py | 83 +++---------------------------- tests/tasks/test_queries.py | 8 +-- 6 files changed, 62 insertions(+), 88 deletions(-) diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index 4a0d393485..fa2d060b37 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -4,16 +4,16 @@ from flask import make_response, request from flask_login import current_user from flask_restful import abort -from redash import models, settings +from redash import models, settings, rq_redis_connection from redash.handlers.base import BaseResource, get_object_or_404, record_event from redash.permissions import (has_access, not_view_only, require_access, require_permission, view_only) -from redash.tasks import QueryTask from redash.tasks.queries import enqueue_query from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow, to_filename) from redash.models.parameterized_query import (ParameterizedQuery, InvalidParameterError, QueryDetachedFromDataSourceError, dropdown_values) -from redash.serializers import serialize_query_result, serialize_query_result_to_csv, serialize_query_result_to_xlsx +from redash.serializers import (serialize_query_result, serialize_query_result_to_csv, + serialize_query_result_to_xlsx, serialize_job) def error_response(message, http_status=400): @@ -67,7 +67,7 @@ def run_query(query, parameters, data_source, query_id, max_age=0): "Username": repr(current_user) if current_user.is_api_user() else current_user.email, "Query ID": query_id }) - return {'job': job.to_dict()} + return serialize_job(job) def get_download_filename(query_result, query, filetype): @@ -317,12 +317,12 @@ def get(self, job_id, query_id=None): """ Retrieve info about a running query job. """ - job = QueryTask(job_id) - return {'job': job.to_dict()} + job = CancellableJob.fetch(job_id, connection=rq_redis_connection) + return serialize_job(job) def delete(self, job_id): """ Cancel a query job in progress. """ - job = QueryTask(job_id) + job = CancellableJob.fetch(job_id, connection=rq_redis_connection) job.cancel() diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index 0dc8b6a9f9..7f92d7540a 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -6,12 +6,15 @@ from funcy import project from flask_login import current_user +from rq.job import JobStatus +from rq.timeouts import JobTimeoutException from redash import models from redash.permissions import has_access, view_only from redash.utils import json_loads from redash.models.parameterized_query import ParameterizedQuery + from .query_result import serialize_query_result, serialize_query_result_to_csv, serialize_query_result_to_xlsx @@ -228,3 +231,41 @@ def serialize_dashboard(obj, with_widgets=False, user=None, with_favorite_state= d['is_favorite'] = models.Favorite.is_favorite(current_user.id, obj) return d + + +def serialize_job(job): + # TODO: this is mapping to the old Job class statuses. Need to update the client side and remove this + STATUSES = { + JobStatus.QUEUED: 1, + JobStatus.STARTED: 2, + JobStatus.FINISHED: 3, + JobStatus.FAILED: 4 + } + + job_status = job.get_status() + if job_status == JobStatus.STARTED: + updated_at = job.started_at or 0 + else: + updated_at = 0 + + status = STATUSES[job_status] + + if isinstance(job.result, Exception): + error = str(job.result) + status = 4 + elif job.is_cancelled: + error = 'Query execution cancelled.' + else: + error = '' + + successfully_finished = job_status == JobStatus.FINISHED and not error + + return { + 'job': { + 'id': job.id, + 'updated_at': updated_at, + 'status': status, + 'error': error, + 'query_result_id': result if successfully_finished else None + } + } diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index 9d3023a4ab..b1ed19c336 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -1,5 +1,5 @@ from .general import record_event, version_check, send_mail, sync_user_details, purge_failed_jobs -from .queries import (QueryTask, enqueue_query, execute_query, refresh_queries, +from .queries import (enqueue_query, execute_query, refresh_queries, refresh_schemas, cleanup_query_results, empty_schedules) from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors diff --git a/redash/tasks/queries/__init__.py b/redash/tasks/queries/__init__.py index 120091952a..f5ee125105 100644 --- a/redash/tasks/queries/__init__.py +++ b/redash/tasks/queries/__init__.py @@ -1,2 +1,2 @@ from .maintenance import refresh_queries, refresh_schemas, cleanup_query_results, empty_schedules -from .execution import QueryTask, execute_query, enqueue_query +from .execution import execute_query, enqueue_query diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 88da0346b6..6101f44c3e 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -5,7 +5,7 @@ from six import text_type from rq import get_current_job -from rq.job import Job, JobStatus +from rq.job import Job from rq.timeouts import JobTimeoutException from redash import models, rq_redis_connection, redis_connection, settings @@ -28,72 +28,6 @@ def _unlock(query_hash, data_source_id): redis_connection.delete(_job_lock_id(query_hash, data_source_id)) -class QueryTask(object): - # TODO: this is mapping to the old Job class statuses. Need to update the client side and remove this - STATUSES = { - JobStatus.QUEUED: 1, - JobStatus.STARTED: 2, - JobStatus.FINISHED: 3, - JobStatus.FAILED: 4 - } - - def __init__(self, job): - self._job = job if isinstance(job, Job) else CancellableJob.fetch(job, connection=rq_redis_connection) - - @property - def id(self): - return self._job.id - - def to_dict(self): - job_status = self.rq_status - if job_status == JobStatus.STARTED: - updated_at = self._job.started_at or 0 - else: - updated_at = 0 - - status = self.STATUSES[job_status] - - result = self._job.result - if isinstance(result, JobTimeoutException): - error = TIMEOUT_MESSAGE - status = 4 - elif isinstance(result, Exception): # TODO - verify exception messages are returned - error = str(result) - status = 4 - elif self.cancelled: - error = 'Query execution cancelled.' - else: - error = '' - - if job_status == JobStatus.FINISHED and not error: - query_result_id = result - else: - query_result_id = None - - return { - 'id': self.id, - 'updated_at': updated_at, - 'status': status, - 'error': error, - 'query_result_id': query_result_id, - } - - @property - def rq_status(self): - return self._job.get_status() - - @property - def ready(self): - return self._job.is_finished - - @property - def cancelled(self): - return self._job.is_cancelled - - def cancel(self): - self._job.cancel() - - def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) @@ -110,9 +44,9 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) - job = QueryTask(job_id) + job = CancellableJob.fetch(job_id, connection=rq_redis_connection) - if job.ready: + if job.is_finished: logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.rq_status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None @@ -131,13 +65,12 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query metadata['Queue'] = queue_name queue = CancellableQueue(queue_name, connection=rq_redis_connection) - result = queue.enqueue(execute_query, query, data_source.id, metadata, - user_id=user_id, - scheduled_query_id=scheduled_query_id, - is_api_key=is_api_key, - job_timeout=time_limit) + job = queue.enqueue(execute_query, query, data_source.id, metadata, + user_id=user_id, + scheduled_query_id=scheduled_query_id, + is_api_key=is_api_key, + job_timeout=time_limit) - job = QueryTask(result) logging.info("[%s] Created new job: %s", query_hash, job.id) pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) pipe.execute() diff --git a/tests/tasks/test_queries.py b/tests/tasks/test_queries.py index e496e6df2c..007c695e7b 100644 --- a/tests/tasks/test_queries.py +++ b/tests/tasks/test_queries.py @@ -13,10 +13,10 @@ from redash.tasks import CancellableJob -FakeResult = namedtuple('FakeResult', 'id ready') +FakeResult = namedtuple('FakeResult', 'id is_finished') -def generate_query_task(*args, **kwargs): +def fetch_job(*args, **kwargs): if any(args): job_id = args[0] if isinstance(args[0], str) else args[0].id else: @@ -29,7 +29,7 @@ def create_job(*args, **kwargs): return CancellableJob(connection=rq_redis_connection) -@patch('redash.tasks.queries.execution.QueryTask', side_effect=generate_query_task) +@patch('redash.tasks.queries.execution.CancellableJob.fetch', side_effect=fetch_job) @patch('redash.tasks.queries.execution.CancellableQueue.enqueue', side_effect=create_job) class TestEnqueueTask(BaseTestCase): def test_multiple_enqueue_of_same_query(self, enqueue, _): @@ -60,7 +60,7 @@ def test_multiple_enqueue_of_different_query(self, enqueue, _): self.assertEqual(3, enqueue.call_count) -@patch('redash.tasks.queries.execution.get_current_job', side_effect=generate_query_task) +@patch('redash.tasks.queries.execution.get_current_job', side_effect=fetch_job) class QueryExecutorTests(BaseTestCase): def test_success(self, _): """ From 66f3db912b4a580cb416aed13aa93be1670ebeb0 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Wed, 4 Dec 2019 21:36:43 +0000 Subject: [PATCH 16/24] Revert "remove Celery monitoring" This reverts commit 37a74ea403301a21db9c36b3f05fbdc26cbd3530. --- client/app/components/admin/CeleryStatus.jsx | 95 +++++++++++++ client/app/components/admin/Layout.jsx | 3 + client/app/components/admin/RQStatus.jsx | 25 ---- client/app/pages/admin/Jobs.jsx | 3 +- client/app/pages/admin/Tasks.jsx | 135 +++++++++++++++++++ redash/handlers/admin.py | 18 ++- redash/monitor.py | 79 ++++++++++- 7 files changed, 330 insertions(+), 28 deletions(-) create mode 100644 client/app/components/admin/CeleryStatus.jsx create mode 100644 client/app/pages/admin/Tasks.jsx diff --git a/client/app/components/admin/CeleryStatus.jsx b/client/app/components/admin/CeleryStatus.jsx new file mode 100644 index 0000000000..959b10e6ce --- /dev/null +++ b/client/app/components/admin/CeleryStatus.jsx @@ -0,0 +1,95 @@ +import { map } from 'lodash'; +import React from 'react'; +import PropTypes from 'prop-types'; + +import Table from 'antd/lib/table'; +import Card from 'antd/lib/card'; +import Spin from 'antd/lib/spin'; +import Badge from 'antd/lib/badge'; +import { Columns } from '@/components/items-list/components/ItemsTable'; + +// CounterCard + +export function CounterCard({ title, value, loading }) { + return ( + + + {title} +
{value}
+
+
+ ); +} + +CounterCard.propTypes = { + title: PropTypes.string.isRequired, + value: PropTypes.oneOfType([PropTypes.number, PropTypes.string]), + loading: PropTypes.bool.isRequired, +}; + +CounterCard.defaultProps = { + value: '', +}; + +// Tables + +const commonColumns = [ + { title: 'Worker Name', dataIndex: 'worker' }, + { title: 'PID', dataIndex: 'worker_pid' }, + { title: 'Queue', dataIndex: 'queue' }, + Columns.custom((value) => { + if (value === 'active') { + return Active; + } + return {value}; + }, { + title: 'State', + dataIndex: 'state', + }), + Columns.timeAgo({ title: 'Start Time', dataIndex: 'start_time' }), +]; + +const queryColumns = commonColumns.concat([ + Columns.timeAgo({ title: 'Enqueue Time', dataIndex: 'enqueue_time' }), + { title: 'Query ID', dataIndex: 'query_id' }, + { title: 'Org ID', dataIndex: 'org_id' }, + { title: 'Data Source ID', dataIndex: 'data_source_id' }, + { title: 'User ID', dataIndex: 'user_id' }, + { title: 'Scheduled', dataIndex: 'scheduled' }, +]); + +const queuesColumns = map( + ['Name', 'Active', 'Reserved', 'Waiting'], + c => ({ title: c, dataIndex: c.toLowerCase() }), +); + +const TablePropTypes = { + loading: PropTypes.bool.isRequired, + items: PropTypes.arrayOf(PropTypes.object).isRequired, +}; + +export function QueuesTable({ loading, items }) { + return ( +
+ ); +} + +QueuesTable.propTypes = TablePropTypes; + +export function QueriesTable({ loading, items }) { + return ( +
+ ); +} + +QueriesTable.propTypes = TablePropTypes; diff --git a/client/app/components/admin/Layout.jsx b/client/app/components/admin/Layout.jsx index 4ac9621b27..36f2f7e70a 100644 --- a/client/app/components/admin/Layout.jsx +++ b/client/app/components/admin/Layout.jsx @@ -15,6 +15,9 @@ export default function Layout({ activeTab, children }) { System Status}> {(activeTab === 'system_status') ? children : null} + Celery Status}> + {(activeTab === 'tasks') ? children : null} + RQ Status}> {(activeTab === 'jobs') ? children : null} diff --git a/client/app/components/admin/RQStatus.jsx b/client/app/components/admin/RQStatus.jsx index ffc14a679a..88c937dd17 100644 --- a/client/app/components/admin/RQStatus.jsx +++ b/client/app/components/admin/RQStatus.jsx @@ -3,34 +3,9 @@ import React from 'react'; import PropTypes from 'prop-types'; import Badge from 'antd/lib/badge'; -import Card from 'antd/lib/card'; -import Spin from 'antd/lib/spin'; import Table from 'antd/lib/table'; import { Columns } from '@/components/items-list/components/ItemsTable'; -// CounterCard - -export function CounterCard({ title, value, loading }) { - return ( - - - {title} -
{value}
-
-
- ); -} - -CounterCard.propTypes = { - title: PropTypes.string.isRequired, - value: PropTypes.oneOfType([PropTypes.number, PropTypes.string]), - loading: PropTypes.bool.isRequired, -}; - -CounterCard.defaultProps = { - value: '', -}; - // Tables const otherJobsColumns = [ diff --git a/client/app/pages/admin/Jobs.jsx b/client/app/pages/admin/Jobs.jsx index 60fa0a9a60..f04efc39bc 100644 --- a/client/app/pages/admin/Jobs.jsx +++ b/client/app/pages/admin/Jobs.jsx @@ -6,7 +6,8 @@ import Alert from 'antd/lib/alert'; import Tabs from 'antd/lib/tabs'; import * as Grid from 'antd/lib/grid'; import Layout from '@/components/admin/Layout'; -import { CounterCard, WorkersTable, QueuesTable, OtherJobsTable } from '@/components/admin/RQStatus'; +import { CounterCard } from '@/components/admin/CeleryStatus'; +import { WorkersTable, QueuesTable, OtherJobsTable } from '@/components/admin/RQStatus'; import { $http, $location, $rootScope } from '@/services/ng'; import recordEvent from '@/services/recordEvent'; diff --git a/client/app/pages/admin/Tasks.jsx b/client/app/pages/admin/Tasks.jsx new file mode 100644 index 0000000000..566d5c94a3 --- /dev/null +++ b/client/app/pages/admin/Tasks.jsx @@ -0,0 +1,135 @@ +import { values, each } from 'lodash'; +import moment from 'moment'; +import React from 'react'; +import { react2angular } from 'react2angular'; + +import Alert from 'antd/lib/alert'; +import Tabs from 'antd/lib/tabs'; +import * as Grid from 'antd/lib/grid'; +import Layout from '@/components/admin/Layout'; +import { CounterCard, QueuesTable, QueriesTable } from '@/components/admin/CeleryStatus'; + +import { $http } from '@/services/ng'; +import recordEvent from '@/services/recordEvent'; +import { routesToAngularRoutes } from '@/lib/utils'; + +// Converting name coming from API to the one the UI expects. +// TODO: update the UI components to use `waiting_in_queue` instead of `waiting`. +function stateName(state) { + if (state === 'waiting_in_queue') { + return 'waiting'; + } + return state; +} + +class Tasks extends React.Component { + state = { + isLoading: true, + error: null, + + queues: [], + queries: [], + counters: { active: 0, reserved: 0, waiting: 0 }, + }; + + componentDidMount() { + recordEvent('view', 'page', 'admin/tasks'); + $http + .get('/api/admin/queries/tasks') + .then(({ data }) => this.processTasks(data.tasks)) + .catch(error => this.handleError(error)); + } + + componentWillUnmount() { + // Ignore data after component unmounted + this.processTasks = () => {}; + this.handleError = () => {}; + } + + processTasks = (tasks) => { + const queues = {}; + const queries = []; + + const counters = { active: 0, reserved: 0, waiting: 0 }; + + each(tasks, (task) => { + task.state = stateName(task.state); + queues[task.queue] = queues[task.queue] || { name: task.queue, active: 0, reserved: 0, waiting: 0 }; + queues[task.queue][task.state] += 1; + + if (task.enqueue_time) { + task.enqueue_time = moment(task.enqueue_time * 1000.0); + } + if (task.start_time) { + task.start_time = moment(task.start_time * 1000.0); + } + + counters[task.state] += 1; + + if (task.task_name === 'redash.tasks.execute_query') { + queries.push(task); + } + }); + + this.setState({ isLoading: false, queues: values(queues), queries, counters }); + }; + + handleError = (error) => { + this.setState({ isLoading: false, error }); + }; + + render() { + const { isLoading, error, queues, queries, counters } = this.state; + + return ( + +
+ {error && ( + + )} + + {!error && ( + + + + + + + + + + + + + + + + + + + + + + + )} +
+
+ ); + } +} + +export default function init(ngModule) { + ngModule.component('pageTasks', react2angular(Tasks)); + + return routesToAngularRoutes([ + { + path: '/admin/queries/tasks', + title: 'Celery Status', + key: 'tasks', + }, + ], { + template: '', + }); +} + +init.init = true; diff --git a/redash/handlers/admin.py b/redash/handlers/admin.py index 62eeaf1309..ba8dd30ec9 100644 --- a/redash/handlers/admin.py +++ b/redash/handlers/admin.py @@ -8,7 +8,7 @@ from redash.permissions import require_super_admin from redash.serializers import QuerySerializer from redash.utils import json_loads -from redash.monitor import rq_status +from redash.monitor import celery_tasks, rq_status @routes.route('/api/admin/queries/outdated', methods=['GET']) @@ -38,6 +38,22 @@ def outdated_queries(): return json_response(response) +@routes.route('/api/admin/queries/tasks', methods=['GET']) +@require_super_admin +@login_required +def queries_tasks(): + record_event(current_org, current_user._get_current_object(), { + 'action': 'list', + 'object_type': 'celery_tasks' + }) + + response = { + 'tasks': celery_tasks(), + } + + return json_response(response) + + @routes.route('/api/admin/queries/rq_status', methods=['GET']) @require_super_admin @login_required diff --git a/redash/monitor.py b/redash/monitor.py index b062db295d..cd9471dff0 100644 --- a/redash/monitor.py +++ b/redash/monitor.py @@ -4,6 +4,7 @@ from redash import redis_connection, rq_redis_connection, __version__, settings from redash.models import db, DataSource, Query, QueryResult, Dashboard, Widget from redash.utils import json_loads +from redash.worker import celery from rq import Queue, Worker from rq.job import Job from rq.registry import StartedJobRegistry @@ -25,8 +26,17 @@ def get_object_counts(): return status +def get_celery_queues(): + queue_names = db.session.query(DataSource.queue_name).distinct() + scheduled_queue_names = db.session.query(DataSource.scheduled_queue_name).distinct() + query = db.session.execute(union_all(queue_names, scheduled_queue_names)) + + return ['celery'] + [row[0] for row in query] + + def get_queues_status(): - return {queue.name: {'size': len(queue)} for queue in Queue.all(connection=rq_redis_connection)} + return {**{queue: {'size': redis_connection.llen(queue)} for queue in get_celery_queues()}, + **{queue.name: {'size': len(queue)} for queue in Queue.all(connection=rq_redis_connection)}} def get_db_sizes(): @@ -57,6 +67,73 @@ def get_status(): return status +def get_waiting_in_queue(queue_name): + jobs = [] + for raw in redis_connection.lrange(queue_name, 0, -1): + job = json_loads(raw) + try: + args = json_loads(job['headers']['argsrepr']) + if args.get('query_id') == 'adhoc': + args['query_id'] = None + except ValueError: + args = {} + + job_row = { + 'state': 'waiting_in_queue', + 'task_name': job['headers']['task'], + 'worker': None, + 'worker_pid': None, + 'start_time': None, + 'task_id': job['headers']['id'], + 'queue': job['properties']['delivery_info']['routing_key'] + } + + job_row.update(args) + jobs.append(job_row) + + return jobs + + +def parse_tasks(task_lists, state): + rows = [] + + for task in itertools.chain(*task_lists.values()): + task_row = { + 'state': state, + 'task_name': task['name'], + 'worker': task['hostname'], + 'queue': task['delivery_info']['routing_key'], + 'task_id': task['id'], + 'worker_pid': task['worker_pid'], + 'start_time': task['time_start'], + } + + if task['name'] == 'redash.tasks.execute_query': + try: + args = json_loads(task['args']) + except ValueError: + args = {} + + if args.get('query_id') == 'adhoc': + args['query_id'] = None + + task_row.update(args) + + rows.append(task_row) + + return rows + + +def celery_tasks(): + tasks = parse_tasks(celery.control.inspect().active(), 'active') + tasks += parse_tasks(celery.control.inspect().reserved(), 'reserved') + + for queue_name in get_celery_queues(): + tasks += get_waiting_in_queue(queue_name) + + return tasks + + def fetch_jobs(queue, job_ids): return [{ 'id': job.id, From 045cb9641de20e3fe399b0a3cbed9e07c93d512f Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Wed, 4 Dec 2019 21:47:11 +0000 Subject: [PATCH 17/24] reduce occurences of the word 'task' --- redash/tasks/queries/execution.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 6101f44c3e..585b7b0893 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -150,7 +150,7 @@ def run(self): run_time = time.time() - started_at - logger.info("task=execute_query query_hash=%s data_length=%s error=[%s]", + logger.info("job=execute_query query_hash=%s data_length=%s error=[%s]", self.query_hash, data and len(data), error) _unlock(self.query_hash, self.data_source.id) @@ -185,7 +185,7 @@ def run(self): return result def _annotate_query(self, query_runner): - self.metadata['Task ID'] = self.job.id + self.metadata['Job ID'] = self.job.id self.metadata['Query Hash'] = self.query_hash self.metadata['Scheduled'] = self.scheduled_query is not None @@ -193,8 +193,8 @@ def _annotate_query(self, query_runner): def _log_progress(self, state): logger.info( - "task=execute_query state=%s query_hash=%s type=%s ds_id=%d " - "task_id=%s queue=%s query_id=%s username=%s", + "job=execute_query state=%s query_hash=%s type=%s ds_id=%d " + "job_id=%s queue=%s query_id=%s username=%s", state, self.query_hash, self.data_source.type, self.data_source.id, self.job.id, self.metadata.get('Queue', 'unknown'), @@ -202,7 +202,7 @@ def _log_progress(self, state): self.metadata.get('Username', 'unknown')) def _load_data_source(self): - logger.info("task=execute_query state=load_ds ds_id=%d", self.data_source_id) + logger.info("job=execute_query state=load_ds ds_id=%d", self.data_source_id) return models.DataSource.query.get(self.data_source_id) From d486cbb337f566638a99b9e6fe411811df46e430 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Wed, 4 Dec 2019 22:26:35 +0000 Subject: [PATCH 18/24] use Worker, Queue and Job instead of spreading names that share behavior details --- redash/cli/rq.py | 2 +- redash/handlers/query_results.py | 5 +++-- redash/serializers/__init__.py | 6 ++---- redash/tasks/__init__.py | 2 +- redash/tasks/queries/execution.py | 7 +++---- .../tasks/{hard_limiting_worker.py => worker.py} | 14 +++++++++----- tests/tasks/test_queries.py | 8 ++++---- 7 files changed, 23 insertions(+), 21 deletions(-) rename redash/tasks/{hard_limiting_worker.py => worker.py} (93%) diff --git a/redash/cli/rq.py b/redash/cli/rq.py index ebf9cc5604..5ec8ab10c3 100644 --- a/redash/cli/rq.py +++ b/redash/cli/rq.py @@ -9,7 +9,7 @@ from sqlalchemy.orm import configure_mappers from redash import rq_redis_connection -from redash.tasks import HardLimitingWorker as Worker, rq_scheduler, schedule_periodic_jobs, periodic_job_definitions +from redash.tasks import Worker, rq_scheduler, schedule_periodic_jobs, periodic_job_definitions manager = AppGroup(help="RQ management commands.") diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index fa2d060b37..7d8ff374d9 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -8,6 +8,7 @@ from redash.handlers.base import BaseResource, get_object_or_404, record_event from redash.permissions import (has_access, not_view_only, require_access, require_permission, view_only) +from redash.tasks import Job from redash.tasks.queries import enqueue_query from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow, to_filename) from redash.models.parameterized_query import (ParameterizedQuery, InvalidParameterError, @@ -317,12 +318,12 @@ def get(self, job_id, query_id=None): """ Retrieve info about a running query job. """ - job = CancellableJob.fetch(job_id, connection=rq_redis_connection) + job = Job.fetch(job_id, connection=rq_redis_connection) return serialize_job(job) def delete(self, job_id): """ Cancel a query job in progress. """ - job = CancellableJob.fetch(job_id, connection=rq_redis_connection) + job = Job.fetch(job_id, connection=rq_redis_connection) job.cancel() diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index 7f92d7540a..b52c334fe9 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -243,7 +243,7 @@ def serialize_job(job): } job_status = job.get_status() - if job_status == JobStatus.STARTED: + if job.is_started: updated_at = job.started_at or 0 else: updated_at = 0 @@ -258,14 +258,12 @@ def serialize_job(job): else: error = '' - successfully_finished = job_status == JobStatus.FINISHED and not error - return { 'job': { 'id': job.id, 'updated_at': updated_at, 'status': status, 'error': error, - 'query_result_id': result if successfully_finished else None + 'query_result_id': result if job.is_finished and not error else None } } diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index b1ed19c336..ad93b759cd 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -3,5 +3,5 @@ refresh_schemas, cleanup_query_results, empty_schedules) from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors -from .hard_limiting_worker import HardLimitingWorker, CancellableQueue, CancellableJob +from .worker import Worker, Queue, Job from .schedule import rq_scheduler, schedule_periodic_jobs, periodic_job_definitions diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 585b7b0893..4db8fc755a 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -5,12 +5,11 @@ from six import text_type from rq import get_current_job -from rq.job import Job from rq.timeouts import JobTimeoutException from redash import models, rq_redis_connection, redis_connection, settings from redash.query_runner import InterruptException -from redash.tasks.hard_limiting_worker import CancellableQueue, CancellableJob +from redash.tasks.worker import Queue, Job from redash.tasks.alerts import check_alerts_for_query from redash.tasks.failure_report import track_failure from redash.utils import gen_query_hash, json_dumps, utcnow @@ -44,7 +43,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) - job = CancellableJob.fetch(job_id, connection=rq_redis_connection) + job = Job.fetch(job_id, connection=rq_redis_connection) if job.is_finished: logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.rq_status) @@ -64,7 +63,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query time_limit = settings.dynamic_settings.query_time_limit(scheduled_query, user_id, data_source.org_id) metadata['Queue'] = queue_name - queue = CancellableQueue(queue_name, connection=rq_redis_connection) + queue = Queue(queue_name, connection=rq_redis_connection) job = queue.enqueue(execute_query, query, data_source.id, metadata, user_id=user_id, scheduled_query_id=scheduled_query_id, diff --git a/redash/tasks/hard_limiting_worker.py b/redash/tasks/worker.py similarity index 93% rename from redash/tasks/hard_limiting_worker.py rename to redash/tasks/worker.py index b037c743fd..425d10b1ea 100644 --- a/redash/tasks/hard_limiting_worker.py +++ b/redash/tasks/worker.py @@ -2,13 +2,13 @@ import os import signal import time -from rq import Worker, Queue, get_current_job +from rq import Worker as BaseWorker, Queue as BaseQueue, get_current_job from rq.utils import utcnow from rq.timeouts import UnixSignalDeathPenalty, HorseMonitorTimeoutException -from rq.job import Job, JobStatus +from rq.job import Job as BaseJob, JobStatus -class CancellableJob(Job): +class CancellableJob(BaseJob): def cancel(self, pipeline=None): # TODO - add tests that verify that queued jobs are removed from queue and running jobs are actively cancelled if self.is_started: @@ -22,11 +22,11 @@ def is_cancelled(self): return self.meta.get('cancelled', False) -class CancellableQueue(Queue): +class CancellableQueue(BaseQueue): job_class = CancellableJob -class HardLimitingWorker(Worker): +class HardLimitingWorker(BaseWorker): """ RQ's work horses enforce time limits by setting a timed alarm and stopping jobs when they reach their time limits. However, the work horse may be entirely blocked @@ -113,3 +113,7 @@ def monitor_work_horse(self, job): exc_string="Work-horse process was terminated unexpectedly " "(waitpid returned %s)" % ret_val ) + +Job = CancellableJob +Queue = CancellableQueue +Worker = HardLimitingWorker \ No newline at end of file diff --git a/tests/tasks/test_queries.py b/tests/tasks/test_queries.py index 007c695e7b..709411b53f 100644 --- a/tests/tasks/test_queries.py +++ b/tests/tasks/test_queries.py @@ -10,7 +10,7 @@ from redash.utils import json_dumps from redash.query_runner.pg import PostgreSQL from redash.tasks.queries.execution import QueryExecutionError, enqueue_query, execute_query -from redash.tasks import CancellableJob +from redash.tasks import Job FakeResult = namedtuple('FakeResult', 'id is_finished') @@ -26,11 +26,11 @@ def fetch_job(*args, **kwargs): def create_job(*args, **kwargs): - return CancellableJob(connection=rq_redis_connection) + return Job(connection=rq_redis_connection) -@patch('redash.tasks.queries.execution.CancellableJob.fetch', side_effect=fetch_job) -@patch('redash.tasks.queries.execution.CancellableQueue.enqueue', side_effect=create_job) +@patch('redash.tasks.queries.execution.Job.fetch', side_effect=fetch_job) +@patch('redash.tasks.queries.execution.Queue.enqueue', side_effect=create_job) class TestEnqueueTask(BaseTestCase): def test_multiple_enqueue_of_same_query(self, enqueue, _): query = self.factory.create_query() From 8b1a471148a2b28c6782cf4cdce46e8b10c75f2a Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Thu, 5 Dec 2019 08:10:54 +0000 Subject: [PATCH 19/24] remove locks for failed jobs as well --- redash/tasks/queries/execution.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 4db8fc755a..9f57457101 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -5,6 +5,7 @@ from six import text_type from rq import get_current_job +from rq.job import JobStatus from rq.timeouts import JobTimeoutException from redash import models, rq_redis_connection, redis_connection, settings @@ -45,8 +46,9 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query job = Job.fetch(job_id, connection=rq_redis_connection) - if job.is_finished: - logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.rq_status) + status = job.get_status() + if status in [JobStatus.FINISHED, JobStatus.FAILED] + logging.info("[%s] job found is ready (%s), removing lock", query_hash, status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None From 75bbbf8d8d051d24f8a7eeb80dc0f390e5479128 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Thu, 5 Dec 2019 19:44:42 +0000 Subject: [PATCH 20/24] did I not commit that colon? oh my --- redash/tasks/queries/execution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 9f57457101..05b332323f 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -47,7 +47,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query job = Job.fetch(job_id, connection=rq_redis_connection) status = job.get_status() - if status in [JobStatus.FINISHED, JobStatus.FAILED] + if status in [JobStatus.FINISHED, JobStatus.FAILED]: logging.info("[%s] job found is ready (%s), removing lock", query_hash, status) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None From 063708095a6968b565b1d03cdd4688c153506985 Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Thu, 5 Dec 2019 20:34:17 +0000 Subject: [PATCH 21/24] push the redis connection to RQ's stack on every request to avoid verbose connection setting --- redash/app.py | 3 ++- redash/handlers/query_results.py | 6 +++--- redash/monitor.py | 6 +++--- redash/serializers/__init__.py | 2 +- redash/tasks/__init__.py | 8 ++++++++ redash/tasks/queries/execution.py | 6 +++--- 6 files changed, 20 insertions(+), 11 deletions(-) diff --git a/redash/app.py b/redash/app.py index 8bca2de5f1..31f557c5a7 100644 --- a/redash/app.py +++ b/redash/app.py @@ -20,7 +20,7 @@ def __init__(self, *args, **kwargs): def create_app(): - from . import authentication, extensions, handlers, limiter, mail, migrate, security + from . import authentication, extensions, handlers, limiter, mail, migrate, security, tasks from .handlers.webpack import configure_webpack from .metrics import request as request_metrics from .models import db, users @@ -44,5 +44,6 @@ def create_app(): configure_webpack(app) extensions.init_app(app) users.init_app(app) + tasks.init_app(app) return app diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index 7d8ff374d9..2060a55975 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -4,7 +4,7 @@ from flask import make_response, request from flask_login import current_user from flask_restful import abort -from redash import models, settings, rq_redis_connection +from redash import models, settings from redash.handlers.base import BaseResource, get_object_or_404, record_event from redash.permissions import (has_access, not_view_only, require_access, require_permission, view_only) @@ -318,12 +318,12 @@ def get(self, job_id, query_id=None): """ Retrieve info about a running query job. """ - job = Job.fetch(job_id, connection=rq_redis_connection) + job = Job.fetch(job_id) return serialize_job(job) def delete(self, job_id): """ Cancel a query job in progress. """ - job = Job.fetch(job_id, connection=rq_redis_connection) + job = Job.fetch(job_id) job.cancel() diff --git a/redash/monitor.py b/redash/monitor.py index cd9471dff0..a8f80cfdbc 100644 --- a/redash/monitor.py +++ b/redash/monitor.py @@ -36,7 +36,7 @@ def get_celery_queues(): def get_queues_status(): return {**{queue: {'size': redis_connection.llen(queue)} for queue in get_celery_queues()}, - **{queue.name: {'size': len(queue)} for queue in Queue.all(connection=rq_redis_connection)}} + **{queue.name: {'size': len(queue)} for queue in Queue.all()}} def get_db_sizes(): @@ -150,7 +150,7 @@ def rq_queues(): 'name': q.name, 'started': fetch_jobs(q, StartedJobRegistry(queue=q).get_job_ids()), 'queued': len(q.job_ids) - } for q in Queue.all(connection=rq_redis_connection)} + } for q in Queue.all()} def describe_job(job): @@ -170,7 +170,7 @@ def rq_workers(): 'successful_jobs': w.successful_job_count, 'failed_jobs': w.failed_job_count, 'total_working_time': w.total_working_time - } for w in Worker.all(connection=rq_redis_connection)] + } for w in Worker.all()] def rq_status(): diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index b52c334fe9..9fe75ea2c3 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -264,6 +264,6 @@ def serialize_job(job): 'updated_at': updated_at, 'status': status, 'error': error, - 'query_result_id': result if job.is_finished and not error else None + 'query_result_id': job.result if job.is_finished and not error else None } } diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index ad93b759cd..c7e314d7a1 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -5,3 +5,11 @@ from .failure_report import send_aggregated_errors from .worker import Worker, Queue, Job from .schedule import rq_scheduler, schedule_periodic_jobs, periodic_job_definitions + +from redash import rq_redis_connection +from rq.connections import push_connection, pop_connection + +def init_app(app): + app.before_request(lambda: push_connection(rq_redis_connection)) + app.teardown_request(lambda _: pop_connection()) + diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 05b332323f..8242f1f8af 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -8,7 +8,7 @@ from rq.job import JobStatus from rq.timeouts import JobTimeoutException -from redash import models, rq_redis_connection, redis_connection, settings +from redash import models, redis_connection, settings from redash.query_runner import InterruptException from redash.tasks.worker import Queue, Job from redash.tasks.alerts import check_alerts_for_query @@ -44,7 +44,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query if job_id: logging.info("[%s] Found existing job: %s", query_hash, job_id) - job = Job.fetch(job_id, connection=rq_redis_connection) + job = Job.fetch(job_id) status = job.get_status() if status in [JobStatus.FINISHED, JobStatus.FAILED]: @@ -65,7 +65,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query time_limit = settings.dynamic_settings.query_time_limit(scheduled_query, user_id, data_source.org_id) metadata['Queue'] = queue_name - queue = Queue(queue_name, connection=rq_redis_connection) + queue = Queue(queue_name) job = queue.enqueue(execute_query, query, data_source.id, metadata, user_id=user_id, scheduled_query_id=scheduled_query_id, From f086b8171b251202e5a47243cc0e114ce404c58f Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Mon, 9 Dec 2019 08:56:53 +0000 Subject: [PATCH 22/24] use a connection context for tests --- tests/tasks/test_queries.py | 43 ++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/tests/tasks/test_queries.py b/tests/tasks/test_queries.py index 709411b53f..f0e9d176cc 100644 --- a/tests/tasks/test_queries.py +++ b/tests/tasks/test_queries.py @@ -1,9 +1,9 @@ from unittest import TestCase -from collections import namedtuple import uuid -import mock -from mock import patch +from mock import patch, Mock + +from rq import Connection from tests import BaseTestCase from redash import redis_connection, rq_redis_connection, models @@ -13,16 +13,16 @@ from redash.tasks import Job -FakeResult = namedtuple('FakeResult', 'id is_finished') - - def fetch_job(*args, **kwargs): if any(args): job_id = args[0] if isinstance(args[0], str) else args[0].id else: job_id = create_job().id - return FakeResult(job_id, False) + result = Mock() + result.id = job_id + + return result def create_job(*args, **kwargs): @@ -35,17 +35,19 @@ class TestEnqueueTask(BaseTestCase): def test_multiple_enqueue_of_same_query(self, enqueue, _): query = self.factory.create_query() - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) + with Connection(rq_redis_connection): + enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) + enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) + enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) self.assertEqual(1, enqueue.call_count) - @mock.patch('redash.settings.dynamic_settings.query_time_limit', return_value=60) + @patch('redash.settings.dynamic_settings.query_time_limit', return_value=60) def test_limits_query_time(self, _, enqueue, __): query = self.factory.create_query() - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) + with Connection(rq_redis_connection): + enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) _, kwargs = enqueue.call_args self.assertEqual(60, kwargs.get('job_timeout')) @@ -53,9 +55,10 @@ def test_limits_query_time(self, _, enqueue, __): def test_multiple_enqueue_of_different_query(self, enqueue, _): query = self.factory.create_query() - enqueue_query(query.query_text, query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text + '2', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text + '3', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) + with Connection(rq_redis_connection): + enqueue_query(query.query_text, query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) + enqueue_query(query.query_text + '2', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) + enqueue_query(query.query_text + '3', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) self.assertEqual(3, enqueue.call_count) @@ -66,7 +69,7 @@ def test_success(self, _): """ ``execute_query`` invokes the query runner and stores a query result. """ - with mock.patch.object(PostgreSQL, "run_query") as qr: + with patch.object(PostgreSQL, "run_query") as qr: query_result_data = {"columns": [], "rows": []} qr.return_value = (json_dumps(query_result_data), None) result_id = execute_query("SELECT 1, 2", self.factory.data_source.id, {}) @@ -79,7 +82,7 @@ def test_success_scheduled(self, _): Scheduled queries remember their latest results. """ q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) - with mock.patch.object(PostgreSQL, "run_query") as qr: + with patch.object(PostgreSQL, "run_query") as qr: qr.return_value = ([1, 2], None) result_id = execute_query( "SELECT 1, 2", @@ -95,7 +98,7 @@ def test_failure_scheduled(self, _): Scheduled queries that fail have their failure recorded. """ q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) - with mock.patch.object(PostgreSQL, "run_query") as qr: + with patch.object(PostgreSQL, "run_query") as qr: qr.side_effect = ValueError("broken") result = execute_query("SELECT 1, 2", self.factory.data_source.id, {}, @@ -115,7 +118,7 @@ def test_success_after_failure(self, _): Query execution success resets the failure counter. """ q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) - with mock.patch.object(PostgreSQL, "run_query") as qr: + with patch.object(PostgreSQL, "run_query") as qr: qr.side_effect = ValueError("broken") result = execute_query("SELECT 1, 2", self.factory.data_source.id, {}, @@ -124,7 +127,7 @@ def test_success_after_failure(self, _): q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 1) - with mock.patch.object(PostgreSQL, "run_query") as qr: + with patch.object(PostgreSQL, "run_query") as qr: qr.return_value = ([1, 2], None) execute_query("SELECT 1, 2", self.factory.data_source.id, {}, From fd3417b61cca66a8588b7cdc48d11ee0ee20468a Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Mon, 30 Dec 2019 10:29:30 +0000 Subject: [PATCH 23/24] black it up --- redash/app.py | 26 ++- redash/cli/rq.py | 15 +- redash/handlers/query_results.py | 286 ++++++++++++++++++++---------- redash/monitor.py | 156 ++++++++-------- redash/serializers/__init__.py | 275 +++++++++++++++------------- redash/tasks/__init__.py | 19 +- redash/tasks/queries/__init__.py | 7 +- redash/tasks/queries/execution.py | 113 ++++++++---- redash/tasks/schedule.py | 62 +++++-- redash/tasks/worker.py | 37 ++-- tests/tasks/test_queries.py | 122 +++++++++---- tests/tasks/test_schedule.py | 21 ++- 12 files changed, 731 insertions(+), 408 deletions(-) diff --git a/redash/app.py b/redash/app.py index 31f557c5a7..1c592b3e70 100644 --- a/redash/app.py +++ b/redash/app.py @@ -6,21 +6,33 @@ class Redash(Flask): """A custom Flask app for Redash""" + def __init__(self, *args, **kwargs): - kwargs.update({ - 'template_folder': settings.STATIC_ASSETS_PATH, - 'static_folder': settings.STATIC_ASSETS_PATH, - 'static_url_path': '/static', - }) + kwargs.update( + { + "template_folder": settings.STATIC_ASSETS_PATH, + "static_folder": settings.STATIC_ASSETS_PATH, + "static_url_path": "/static", + } + ) super(Redash, self).__init__(__name__, *args, **kwargs) # Make sure we get the right referral address even behind proxies like nginx. self.wsgi_app = ProxyFix(self.wsgi_app, settings.PROXIES_COUNT) # Configure Redash using our settings - self.config.from_object('redash.settings') + self.config.from_object("redash.settings") def create_app(): - from . import authentication, extensions, handlers, limiter, mail, migrate, security, tasks + from . import ( + authentication, + extensions, + handlers, + limiter, + mail, + migrate, + security, + tasks, + ) from .handlers.webpack import configure_webpack from .metrics import request as request_metrics from .models import db, users diff --git a/redash/cli/rq.py b/redash/cli/rq.py index 5ec8ab10c3..8538bf3359 100644 --- a/redash/cli/rq.py +++ b/redash/cli/rq.py @@ -9,7 +9,12 @@ from sqlalchemy.orm import configure_mappers from redash import rq_redis_connection -from redash.tasks import Worker, rq_scheduler, schedule_periodic_jobs, periodic_job_definitions +from redash.tasks import ( + Worker, + rq_scheduler, + schedule_periodic_jobs, + periodic_job_definitions, +) manager = AppGroup(help="RQ management commands.") @@ -22,15 +27,15 @@ def scheduler(): @manager.command() -@argument('queues', nargs=-1) +@argument("queues", nargs=-1) def worker(queues): - # Configure any SQLAlchemy mappers loaded until now so that the mapping configuration - # will already be available to the forked work horses and they won't need + # Configure any SQLAlchemy mappers loaded until now so that the mapping configuration + # will already be available to the forked work horses and they won't need # to spend valuable time re-doing that on every fork. configure_mappers() if not queues: - queues = ['queries', 'periodic', 'emails', 'default', 'schemas'] + queues = ["queries", "periodic", "emails", "default", "schemas"] with Connection(rq_redis_connection): w = Worker(queues, log_job_description=False, job_monitoring_interval=5) diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index 2060a55975..82b2ba3355 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -6,35 +6,66 @@ from flask_restful import abort from redash import models, settings from redash.handlers.base import BaseResource, get_object_or_404, record_event -from redash.permissions import (has_access, not_view_only, require_access, - require_permission, view_only) +from redash.permissions import ( + has_access, + not_view_only, + require_access, + require_permission, + view_only, +) from redash.tasks import Job from redash.tasks.queries import enqueue_query -from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow, to_filename) -from redash.models.parameterized_query import (ParameterizedQuery, InvalidParameterError, - QueryDetachedFromDataSourceError, dropdown_values) -from redash.serializers import (serialize_query_result, serialize_query_result_to_csv, - serialize_query_result_to_xlsx, serialize_job) +from redash.utils import ( + collect_parameters_from_request, + gen_query_hash, + json_dumps, + utcnow, + to_filename, +) +from redash.models.parameterized_query import ( + ParameterizedQuery, + InvalidParameterError, + QueryDetachedFromDataSourceError, + dropdown_values, +) +from redash.serializers import ( + serialize_query_result, + serialize_query_result_to_csv, + serialize_query_result_to_xlsx, + serialize_job, +) def error_response(message, http_status=400): - return {'job': {'status': 4, 'error': message}}, http_status + return {"job": {"status": 4, "error": message}}, http_status error_messages = { - 'unsafe_when_shared': error_response('This query contains potentially unsafe parameters and cannot be executed on a shared dashboard or an embedded visualization.', 403), - 'unsafe_on_view_only': error_response('This query contains potentially unsafe parameters and cannot be executed with read-only access to this data source.', 403), - 'no_permission': error_response('You do not have permission to run queries with this data source.', 403), - 'select_data_source': error_response('Please select data source to run this query.', 401) + "unsafe_when_shared": error_response( + "This query contains potentially unsafe parameters and cannot be executed on a shared dashboard or an embedded visualization.", + 403, + ), + "unsafe_on_view_only": error_response( + "This query contains potentially unsafe parameters and cannot be executed with read-only access to this data source.", + 403, + ), + "no_permission": error_response( + "You do not have permission to run queries with this data source.", 403 + ), + "select_data_source": error_response( + "Please select data source to run this query.", 401 + ), } def run_query(query, parameters, data_source, query_id, max_age=0): if data_source.paused: if data_source.pause_reason: - message = '{} is paused ({}). Please try later.'.format(data_source.name, data_source.pause_reason) + message = "{} is paused ({}). Please try later.".format( + data_source.name, data_source.pause_reason + ) else: - message = '{} is paused. Please try later.'.format(data_source.name) + message = "{} is paused. Please try later.".format(data_source.name) return error_response(message) @@ -44,44 +75,62 @@ def run_query(query, parameters, data_source, query_id, max_age=0): abort(400, message=e.message) if query.missing_params: - return error_response('Missing parameter value for: {}'.format(", ".join(query.missing_params))) + return error_response( + "Missing parameter value for: {}".format(", ".join(query.missing_params)) + ) if max_age == 0: query_result = None else: query_result = models.QueryResult.get_latest(data_source, query.text, max_age) - record_event(current_user.org, current_user, { - 'action': 'execute_query', - 'cache': 'hit' if query_result else 'miss', - 'object_id': data_source.id, - 'object_type': 'data_source', - 'query': query.text, - 'query_id': query_id, - 'parameters': parameters - }) + record_event( + current_user.org, + current_user, + { + "action": "execute_query", + "cache": "hit" if query_result else "miss", + "object_id": data_source.id, + "object_type": "data_source", + "query": query.text, + "query_id": query_id, + "parameters": parameters, + }, + ) if query_result: - return {'query_result': serialize_query_result(query_result, current_user.is_api_user())} + return { + "query_result": serialize_query_result( + query_result, current_user.is_api_user() + ) + } else: - job = enqueue_query(query.text, data_source, current_user.id, current_user.is_api_user(), metadata={ - "Username": repr(current_user) if current_user.is_api_user() else current_user.email, - "Query ID": query_id - }) + job = enqueue_query( + query.text, + data_source, + current_user.id, + current_user.is_api_user(), + metadata={ + "Username": repr(current_user) + if current_user.is_api_user() + else current_user.email, + "Query ID": query_id, + }, + ) return serialize_job(job) def get_download_filename(query_result, query, filetype): retrieved_at = query_result.retrieved_at.strftime("%Y_%m_%d") if query: - filename = to_filename(query.name) if query.name != '' else str(query.id) + filename = to_filename(query.name) if query.name != "" else str(query.id) else: filename = str(query_result.id) return "{}_{}.{}".format(filename, retrieved_at, filetype) class QueryResultListResource(BaseResource): - @require_permission('execute_query') + @require_permission("execute_query") def post(self): """ Execute a query (or retrieve recent results). @@ -97,27 +146,33 @@ def post(self): """ params = request.get_json(force=True) - query = params['query'] - max_age = params.get('max_age', -1) + query = params["query"] + max_age = params.get("max_age", -1) # max_age might have the value of None, in which case calling int(None) will fail if max_age is None: max_age = -1 max_age = int(max_age) - query_id = params.get('query_id', 'adhoc') - parameters = params.get('parameters', collect_parameters_from_request(request.args)) + query_id = params.get("query_id", "adhoc") + parameters = params.get( + "parameters", collect_parameters_from_request(request.args) + ) parameterized_query = ParameterizedQuery(query, org=self.current_org) - data_source_id = params.get('data_source_id') + data_source_id = params.get("data_source_id") if data_source_id: - data_source = models.DataSource.get_by_id_and_org(params.get('data_source_id'), self.current_org) + data_source = models.DataSource.get_by_id_and_org( + params.get("data_source_id"), self.current_org + ) else: - return error_messages['select_data_source'] + return error_messages["select_data_source"] if not has_access(data_source, self.current_user, not_view_only): - return error_messages['no_permission'] + return error_messages["no_permission"] - return run_query(parameterized_query, parameters, data_source, query_id, max_age) + return run_query( + parameterized_query, parameters, data_source, query_id, max_age + ) ONE_YEAR = 60 * 60 * 24 * 365.25 @@ -125,7 +180,9 @@ def post(self): class QueryResultDropdownResource(BaseResource): def get(self, query_id): - query = get_object_or_404(models.Query.get_by_id_and_org, query_id, self.current_org) + query = get_object_or_404( + models.Query.get_by_id_and_org, query_id, self.current_org + ) require_access(query.data_source, current_user, view_only) try: return dropdown_values(query_id, self.current_org) @@ -135,12 +192,18 @@ def get(self, query_id): class QueryDropdownsResource(BaseResource): def get(self, query_id, dropdown_query_id): - query = get_object_or_404(models.Query.get_by_id_and_org, query_id, self.current_org) + query = get_object_or_404( + models.Query.get_by_id_and_org, query_id, self.current_org + ) require_access(query, current_user, view_only) - related_queries_ids = [p['queryId'] for p in query.parameters if p['type'] == 'query'] + related_queries_ids = [ + p["queryId"] for p in query.parameters if p["type"] == "query" + ] if int(dropdown_query_id) not in related_queries_ids: - dropdown_query = get_object_or_404(models.Query.get_by_id_and_org, dropdown_query_id, self.current_org) + dropdown_query = get_object_or_404( + models.Query.get_by_id_and_org, dropdown_query_id, self.current_org + ) require_access(dropdown_query.data_source, current_user, view_only) return dropdown_values(dropdown_query_id, self.current_org) @@ -149,27 +212,33 @@ def get(self, query_id, dropdown_query_id): class QueryResultResource(BaseResource): @staticmethod def add_cors_headers(headers): - if 'Origin' in request.headers: - origin = request.headers['Origin'] + if "Origin" in request.headers: + origin = request.headers["Origin"] - if set(['*', origin]) & settings.ACCESS_CONTROL_ALLOW_ORIGIN: - headers['Access-Control-Allow-Origin'] = origin - headers['Access-Control-Allow-Credentials'] = str(settings.ACCESS_CONTROL_ALLOW_CREDENTIALS).lower() + if set(["*", origin]) & settings.ACCESS_CONTROL_ALLOW_ORIGIN: + headers["Access-Control-Allow-Origin"] = origin + headers["Access-Control-Allow-Credentials"] = str( + settings.ACCESS_CONTROL_ALLOW_CREDENTIALS + ).lower() - @require_permission('view_query') - def options(self, query_id=None, query_result_id=None, filetype='json'): + @require_permission("view_query") + def options(self, query_id=None, query_result_id=None, filetype="json"): headers = {} self.add_cors_headers(headers) if settings.ACCESS_CONTROL_REQUEST_METHOD: - headers['Access-Control-Request-Method'] = settings.ACCESS_CONTROL_REQUEST_METHOD + headers[ + "Access-Control-Request-Method" + ] = settings.ACCESS_CONTROL_REQUEST_METHOD if settings.ACCESS_CONTROL_ALLOW_HEADERS: - headers['Access-Control-Allow-Headers'] = settings.ACCESS_CONTROL_ALLOW_HEADERS + headers[ + "Access-Control-Allow-Headers" + ] = settings.ACCESS_CONTROL_ALLOW_HEADERS return make_response("", 200, headers) - @require_permission('view_query') + @require_permission("view_query") def post(self, query_id): """ Execute a saved query. @@ -182,31 +251,41 @@ def post(self, query_id): always execute. """ params = request.get_json(force=True, silent=True) or {} - parameter_values = params.get('parameters', {}) + parameter_values = params.get("parameters", {}) - max_age = params.get('max_age', -1) + max_age = params.get("max_age", -1) # max_age might have the value of None, in which case calling int(None) will fail if max_age is None: max_age = -1 max_age = int(max_age) - query = get_object_or_404(models.Query.get_by_id_and_org, query_id, self.current_org) + query = get_object_or_404( + models.Query.get_by_id_and_org, query_id, self.current_org + ) allow_executing_with_view_only_permissions = query.parameterized.is_safe - if has_access(query, self.current_user, allow_executing_with_view_only_permissions): - return run_query(query.parameterized, parameter_values, query.data_source, query_id, max_age) + if has_access( + query, self.current_user, allow_executing_with_view_only_permissions + ): + return run_query( + query.parameterized, + parameter_values, + query.data_source, + query_id, + max_age, + ) else: if not query.parameterized.is_safe: if current_user.is_api_user(): - return error_messages['unsafe_when_shared'] + return error_messages["unsafe_when_shared"] else: - return error_messages['unsafe_on_view_only'] + return error_messages["unsafe_on_view_only"] else: - return error_messages['no_permission'] + return error_messages["no_permission"] - @require_permission('view_query') - def get(self, query_id=None, query_result_id=None, filetype='json'): + @require_permission("view_query") + def get(self, query_id=None, query_result_id=None, filetype="json"): """ Retrieve query results. @@ -229,52 +308,66 @@ def get(self, query_id=None, query_result_id=None, filetype='json'): should_cache = query_result_id is not None parameter_values = collect_parameters_from_request(request.args) - max_age = int(request.args.get('maxAge', 0)) + max_age = int(request.args.get("maxAge", 0)) query_result = None query = None if query_result_id: - query_result = get_object_or_404(models.QueryResult.get_by_id_and_org, query_result_id, self.current_org) + query_result = get_object_or_404( + models.QueryResult.get_by_id_and_org, query_result_id, self.current_org + ) if query_id is not None: - query = get_object_or_404(models.Query.get_by_id_and_org, query_id, self.current_org) - - if query_result is None and query is not None and query.latest_query_data_id is not None: - query_result = get_object_or_404(models.QueryResult.get_by_id_and_org, - query.latest_query_data_id, - self.current_org) + query = get_object_or_404( + models.Query.get_by_id_and_org, query_id, self.current_org + ) - if query is not None and query_result is not None and self.current_user.is_api_user(): + if ( + query_result is None + and query is not None + and query.latest_query_data_id is not None + ): + query_result = get_object_or_404( + models.QueryResult.get_by_id_and_org, + query.latest_query_data_id, + self.current_org, + ) + + if ( + query is not None + and query_result is not None + and self.current_user.is_api_user() + ): if query.query_hash != query_result.query_hash: - abort(404, message='No cached result found for this query.') + abort(404, message="No cached result found for this query.") if query_result: require_access(query_result.data_source, self.current_user, view_only) if isinstance(self.current_user, models.ApiUser): event = { - 'user_id': None, - 'org_id': self.current_org.id, - 'action': 'api_get', - 'api_key': self.current_user.name, - 'file_type': filetype, - 'user_agent': request.user_agent.string, - 'ip': request.remote_addr + "user_id": None, + "org_id": self.current_org.id, + "action": "api_get", + "api_key": self.current_user.name, + "file_type": filetype, + "user_agent": request.user_agent.string, + "ip": request.remote_addr, } if query_id: - event['object_type'] = 'query' - event['object_id'] = query_id + event["object_type"] = "query" + event["object_id"] = query_id else: - event['object_type'] = 'query_result' - event['object_id'] = query_result_id + event["object_type"] = "query_result" + event["object_id"] = query_result_id self.record_event(event) - if filetype == 'json': + if filetype == "json": response = self.make_json_response(query_result) - elif filetype == 'xlsx': + elif filetype == "xlsx": response = self.make_excel_response(query_result) else: response = self.make_csv_response(query_result) @@ -283,33 +376,36 @@ def get(self, query_id=None, query_result_id=None, filetype='json'): self.add_cors_headers(response.headers) if should_cache: - response.headers.add_header('Cache-Control', 'private,max-age=%d' % ONE_YEAR) + response.headers.add_header( + "Cache-Control", "private,max-age=%d" % ONE_YEAR + ) filename = get_download_filename(query_result, query, filetype) response.headers.add_header( - "Content-Disposition", - 'attachment; filename="{}"'.format(filename) + "Content-Disposition", 'attachment; filename="{}"'.format(filename) ) return response else: - abort(404, message='No cached result found for this query.') + abort(404, message="No cached result found for this query.") def make_json_response(self, query_result): - data = json_dumps({'query_result': query_result.to_dict()}) - headers = {'Content-Type': "application/json"} + data = json_dumps({"query_result": query_result.to_dict()}) + headers = {"Content-Type": "application/json"} return make_response(data, 200, headers) @staticmethod def make_csv_response(query_result): - headers = {'Content-Type': "text/csv; charset=UTF-8"} + headers = {"Content-Type": "text/csv; charset=UTF-8"} return make_response(serialize_query_result_to_csv(query_result), 200, headers) @staticmethod def make_excel_response(query_result): - headers = {'Content-Type': "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"} + headers = { + "Content-Type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + } return make_response(serialize_query_result_to_xlsx(query_result), 200, headers) diff --git a/redash/monitor.py b/redash/monitor.py index a8f80cfdbc..50dcdd95a9 100644 --- a/redash/monitor.py +++ b/redash/monitor.py @@ -12,17 +12,20 @@ def get_redis_status(): info = redis_connection.info() - return {'redis_used_memory': info['used_memory'], 'redis_used_memory_human': info['used_memory_human']} + return { + "redis_used_memory": info["used_memory"], + "redis_used_memory_human": info["used_memory_human"], + } def get_object_counts(): status = {} - status['queries_count'] = Query.query.count() + status["queries_count"] = Query.query.count() if settings.FEATURE_SHOW_QUERY_RESULTS_COUNT: - status['query_results_count'] = QueryResult.query.count() - status['unused_query_results_count'] = QueryResult.unused().count() - status['dashboards_count'] = Dashboard.query.count() - status['widgets_count'] = Widget.query.count() + status["query_results_count"] = QueryResult.query.count() + status["unused_query_results_count"] = QueryResult.unused().count() + status["dashboards_count"] = Dashboard.query.count() + status["widgets_count"] = Widget.query.count() return status @@ -31,19 +34,27 @@ def get_celery_queues(): scheduled_queue_names = db.session.query(DataSource.scheduled_queue_name).distinct() query = db.session.execute(union_all(queue_names, scheduled_queue_names)) - return ['celery'] + [row[0] for row in query] + return ["celery"] + [row[0] for row in query] def get_queues_status(): - return {**{queue: {'size': redis_connection.llen(queue)} for queue in get_celery_queues()}, - **{queue.name: {'size': len(queue)} for queue in Queue.all()}} + return { + **{ + queue: {"size": redis_connection.llen(queue)} + for queue in get_celery_queues() + }, + **{queue.name: {"size": len(queue)} for queue in Queue.all()}, + } def get_db_sizes(): database_metrics = [] queries = [ - ['Query Results Size', "select pg_total_relation_size('query_results') as size from (select 1) as a"], - ['Redash DB Size', "select pg_database_size('postgres') as size"] + [ + "Query Results Size", + "select pg_total_relation_size('query_results') as size from (select 1) as a", + ], + ["Redash DB Size", "select pg_database_size('postgres') as size"], ] for query_name, query in queries: result = db.session.execute(query).first() @@ -53,16 +64,13 @@ def get_db_sizes(): def get_status(): - status = { - 'version': __version__, - 'workers': [] - } + status = {"version": __version__, "workers": []} status.update(get_redis_status()) status.update(get_object_counts()) - status['manager'] = redis_connection.hgetall('redash:status') - status['manager']['queues'] = get_queues_status() - status['database_metrics'] = {} - status['database_metrics']['metrics'] = get_db_sizes() + status["manager"] = redis_connection.hgetall("redash:status") + status["manager"]["queues"] = get_queues_status() + status["database_metrics"] = {} + status["database_metrics"]["metrics"] = get_db_sizes() return status @@ -72,20 +80,20 @@ def get_waiting_in_queue(queue_name): for raw in redis_connection.lrange(queue_name, 0, -1): job = json_loads(raw) try: - args = json_loads(job['headers']['argsrepr']) - if args.get('query_id') == 'adhoc': - args['query_id'] = None + args = json_loads(job["headers"]["argsrepr"]) + if args.get("query_id") == "adhoc": + args["query_id"] = None except ValueError: args = {} job_row = { - 'state': 'waiting_in_queue', - 'task_name': job['headers']['task'], - 'worker': None, - 'worker_pid': None, - 'start_time': None, - 'task_id': job['headers']['id'], - 'queue': job['properties']['delivery_info']['routing_key'] + "state": "waiting_in_queue", + "task_name": job["headers"]["task"], + "worker": None, + "worker_pid": None, + "start_time": None, + "task_id": job["headers"]["id"], + "queue": job["properties"]["delivery_info"]["routing_key"], } job_row.update(args) @@ -99,23 +107,23 @@ def parse_tasks(task_lists, state): for task in itertools.chain(*task_lists.values()): task_row = { - 'state': state, - 'task_name': task['name'], - 'worker': task['hostname'], - 'queue': task['delivery_info']['routing_key'], - 'task_id': task['id'], - 'worker_pid': task['worker_pid'], - 'start_time': task['time_start'], + "state": state, + "task_name": task["name"], + "worker": task["hostname"], + "queue": task["delivery_info"]["routing_key"], + "task_id": task["id"], + "worker_pid": task["worker_pid"], + "start_time": task["time_start"], } - if task['name'] == 'redash.tasks.execute_query': + if task["name"] == "redash.tasks.execute_query": try: - args = json_loads(task['args']) + args = json_loads(task["args"]) except ValueError: args = {} - if args.get('query_id') == 'adhoc': - args['query_id'] = None + if args.get("query_id") == "adhoc": + args["query_id"] = None task_row.update(args) @@ -125,8 +133,8 @@ def parse_tasks(task_lists, state): def celery_tasks(): - tasks = parse_tasks(celery.control.inspect().active(), 'active') - tasks += parse_tasks(celery.control.inspect().reserved(), 'reserved') + tasks = parse_tasks(celery.control.inspect().active(), "active") + tasks += parse_tasks(celery.control.inspect().reserved(), "reserved") for queue_name in get_celery_queues(): tasks += get_waiting_in_queue(queue_name) @@ -135,46 +143,52 @@ def celery_tasks(): def fetch_jobs(queue, job_ids): - return [{ - 'id': job.id, - 'name': job.func_name, - 'queue': queue.name, - 'enqueued_at': job.enqueued_at, - 'started_at': job.started_at - } for job in Job.fetch_many(job_ids, connection=rq_redis_connection) if job is not None] + return [ + { + "id": job.id, + "name": job.func_name, + "queue": queue.name, + "enqueued_at": job.enqueued_at, + "started_at": job.started_at, + } + for job in Job.fetch_many(job_ids, connection=rq_redis_connection) + if job is not None + ] def rq_queues(): return { q.name: { - 'name': q.name, - 'started': fetch_jobs(q, StartedJobRegistry(queue=q).get_job_ids()), - 'queued': len(q.job_ids) - } for q in Queue.all()} + "name": q.name, + "started": fetch_jobs(q, StartedJobRegistry(queue=q).get_job_ids()), + "queued": len(q.job_ids), + } + for q in Queue.all() + } def describe_job(job): - return '{} ({})'.format(job.id, job.func_name.split(".").pop()) if job else None + return "{} ({})".format(job.id, job.func_name.split(".").pop()) if job else None def rq_workers(): - return [{ - 'name': w.name, - 'hostname': w.hostname, - 'pid': w.pid, - 'queues': ", ".join([q.name for q in w.queues]), - 'state': w.state, - 'last_heartbeat': w.last_heartbeat, - 'birth_date': w.birth_date, - 'current_job': describe_job(w.get_current_job()), - 'successful_jobs': w.successful_job_count, - 'failed_jobs': w.failed_job_count, - 'total_working_time': w.total_working_time - } for w in Worker.all()] + return [ + { + "name": w.name, + "hostname": w.hostname, + "pid": w.pid, + "queues": ", ".join([q.name for q in w.queues]), + "state": w.state, + "last_heartbeat": w.last_heartbeat, + "birth_date": w.birth_date, + "current_job": describe_job(w.get_current_job()), + "successful_jobs": w.successful_job_count, + "failed_jobs": w.failed_job_count, + "total_working_time": w.total_working_time, + } + for w in Worker.all() + ] def rq_status(): - return { - 'queues': rq_queues(), - 'workers': rq_workers() - } + return {"queues": rq_queues(), "workers": rq_workers()} diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index 9fe75ea2c3..b55eb24ef1 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -15,51 +15,56 @@ from redash.models.parameterized_query import ParameterizedQuery -from .query_result import serialize_query_result, serialize_query_result_to_csv, serialize_query_result_to_xlsx +from .query_result import ( + serialize_query_result, + serialize_query_result_to_csv, + serialize_query_result_to_xlsx, +) def public_widget(widget): res = { - 'id': widget.id, - 'width': widget.width, - 'options': json_loads(widget.options), - 'text': widget.text, - 'updated_at': widget.updated_at, - 'created_at': widget.created_at + "id": widget.id, + "width": widget.width, + "options": json_loads(widget.options), + "text": widget.text, + "updated_at": widget.updated_at, + "created_at": widget.created_at, } v = widget.visualization if v and v.id: - res['visualization'] = { - 'type': v.type, - 'name': v.name, - 'description': v.description, - 'options': json_loads(v.options), - 'updated_at': v.updated_at, - 'created_at': v.created_at, - 'query': { - 'id': v.query_rel.id, - 'name': v.query_rel.name, - 'description': v.query_rel.description, - 'options': v.query_rel.options - } + res["visualization"] = { + "type": v.type, + "name": v.name, + "description": v.description, + "options": json_loads(v.options), + "updated_at": v.updated_at, + "created_at": v.created_at, + "query": { + "id": v.query_rel.id, + "name": v.query_rel.name, + "description": v.query_rel.description, + "options": v.query_rel.options, + }, } return res def public_dashboard(dashboard): - dashboard_dict = project(serialize_dashboard(dashboard, with_favorite_state=False), ( - 'name', 'layout', 'dashboard_filters_enabled', 'updated_at', - 'created_at' - )) - - widget_list = (models.Widget.query - .filter(models.Widget.dashboard_id == dashboard.id) - .outerjoin(models.Visualization) - .outerjoin(models.Query)) - - dashboard_dict['widgets'] = [public_widget(w) for w in widget_list] + dashboard_dict = project( + serialize_dashboard(dashboard, with_favorite_state=False), + ("name", "layout", "dashboard_filters_enabled", "updated_at", "created_at"), + ) + + widget_list = ( + models.Widget.query.filter(models.Widget.dashboard_id == dashboard.id) + .outerjoin(models.Visualization) + .outerjoin(models.Query) + ) + + dashboard_dict["widgets"] = [public_widget(w) for w in widget_list] return dashboard_dict @@ -75,116 +80,137 @@ def __init__(self, object_or_list, **kwargs): def serialize(self): if isinstance(self.object_or_list, models.Query): result = serialize_query(self.object_or_list, **self.options) - if self.options.get('with_favorite_state', True) and not current_user.is_api_user(): - result['is_favorite'] = models.Favorite.is_favorite(current_user.id, self.object_or_list) + if ( + self.options.get("with_favorite_state", True) + and not current_user.is_api_user() + ): + result["is_favorite"] = models.Favorite.is_favorite( + current_user.id, self.object_or_list + ) else: - result = [serialize_query(query, **self.options) for query in self.object_or_list] - if self.options.get('with_favorite_state', True): - favorite_ids = models.Favorite.are_favorites(current_user.id, self.object_or_list) + result = [ + serialize_query(query, **self.options) for query in self.object_or_list + ] + if self.options.get("with_favorite_state", True): + favorite_ids = models.Favorite.are_favorites( + current_user.id, self.object_or_list + ) for query in result: - query['is_favorite'] = query['id'] in favorite_ids + query["is_favorite"] = query["id"] in favorite_ids return result -def serialize_query(query, with_stats=False, with_visualizations=False, with_user=True, with_last_modified_by=True): +def serialize_query( + query, + with_stats=False, + with_visualizations=False, + with_user=True, + with_last_modified_by=True, +): d = { - 'id': query.id, - 'latest_query_data_id': query.latest_query_data_id, - 'name': query.name, - 'description': query.description, - 'query': query.query_text, - 'query_hash': query.query_hash, - 'schedule': query.schedule, - 'api_key': query.api_key, - 'is_archived': query.is_archived, - 'is_draft': query.is_draft, - 'updated_at': query.updated_at, - 'created_at': query.created_at, - 'data_source_id': query.data_source_id, - 'options': query.options, - 'version': query.version, - 'tags': query.tags or [], - 'is_safe': query.parameterized.is_safe, + "id": query.id, + "latest_query_data_id": query.latest_query_data_id, + "name": query.name, + "description": query.description, + "query": query.query_text, + "query_hash": query.query_hash, + "schedule": query.schedule, + "api_key": query.api_key, + "is_archived": query.is_archived, + "is_draft": query.is_draft, + "updated_at": query.updated_at, + "created_at": query.created_at, + "data_source_id": query.data_source_id, + "options": query.options, + "version": query.version, + "tags": query.tags or [], + "is_safe": query.parameterized.is_safe, } if with_user: - d['user'] = query.user.to_dict() + d["user"] = query.user.to_dict() else: - d['user_id'] = query.user_id + d["user_id"] = query.user_id if with_last_modified_by: - d['last_modified_by'] = query.last_modified_by.to_dict() if query.last_modified_by is not None else None + d["last_modified_by"] = ( + query.last_modified_by.to_dict() + if query.last_modified_by is not None + else None + ) else: - d['last_modified_by_id'] = query.last_modified_by_id + d["last_modified_by_id"] = query.last_modified_by_id if with_stats: if query.latest_query_data is not None: - d['retrieved_at'] = query.retrieved_at - d['runtime'] = query.runtime + d["retrieved_at"] = query.retrieved_at + d["runtime"] = query.runtime else: - d['retrieved_at'] = None - d['runtime'] = None + d["retrieved_at"] = None + d["runtime"] = None if with_visualizations: - d['visualizations'] = [serialize_visualization(vis, with_query=False) - for vis in query.visualizations] + d["visualizations"] = [ + serialize_visualization(vis, with_query=False) + for vis in query.visualizations + ] return d def serialize_visualization(object, with_query=True): d = { - 'id': object.id, - 'type': object.type, - 'name': object.name, - 'description': object.description, - 'options': json_loads(object.options), - 'updated_at': object.updated_at, - 'created_at': object.created_at + "id": object.id, + "type": object.type, + "name": object.name, + "description": object.description, + "options": json_loads(object.options), + "updated_at": object.updated_at, + "created_at": object.created_at, } if with_query: - d['query'] = serialize_query(object.query_rel) + d["query"] = serialize_query(object.query_rel) return d def serialize_widget(object): d = { - 'id': object.id, - 'width': object.width, - 'options': json_loads(object.options), - 'dashboard_id': object.dashboard_id, - 'text': object.text, - 'updated_at': object.updated_at, - 'created_at': object.created_at + "id": object.id, + "width": object.width, + "options": json_loads(object.options), + "dashboard_id": object.dashboard_id, + "text": object.text, + "updated_at": object.updated_at, + "created_at": object.created_at, } if object.visualization and object.visualization.id: - d['visualization'] = serialize_visualization(object.visualization) + d["visualization"] = serialize_visualization(object.visualization) return d def serialize_alert(alert, full=True): d = { - 'id': alert.id, - 'name': alert.name, - 'options': alert.options, - 'state': alert.state, - 'last_triggered_at': alert.last_triggered_at, - 'updated_at': alert.updated_at, - 'created_at': alert.created_at, - 'rearm': alert.rearm + "id": alert.id, + "name": alert.name, + "options": alert.options, + "state": alert.state, + "last_triggered_at": alert.last_triggered_at, + "updated_at": alert.updated_at, + "created_at": alert.created_at, + "rearm": alert.rearm, } if full: - d['query'] = serialize_query(alert.query_rel) - d['user'] = alert.user.to_dict() + d["query"] = serialize_query(alert.query_rel) + d["user"] = alert.user.to_dict() else: - d['query_id'] = alert.query_id - d['user_id'] = alert.user_id + d["query_id"] = alert.query_id + d["user_id"] = alert.user_id return d @@ -201,34 +227,43 @@ def serialize_dashboard(obj, with_widgets=False, user=None, with_favorite_state= elif user and has_access(w.visualization.query_rel, user, view_only): widgets.append(serialize_widget(w)) else: - widget = project(serialize_widget(w), - ('id', 'width', 'dashboard_id', 'options', 'created_at', 'updated_at')) - widget['restricted'] = True + widget = project( + serialize_widget(w), + ( + "id", + "width", + "dashboard_id", + "options", + "created_at", + "updated_at", + ), + ) + widget["restricted"] = True widgets.append(widget) else: widgets = None d = { - 'id': obj.id, - 'slug': obj.slug, - 'name': obj.name, - 'user_id': obj.user_id, + "id": obj.id, + "slug": obj.slug, + "name": obj.name, + "user_id": obj.user_id, # TODO: we should properly load the users - 'user': obj.user.to_dict(), - 'layout': layout, - 'dashboard_filters_enabled': obj.dashboard_filters_enabled, - 'widgets': widgets, - 'is_archived': obj.is_archived, - 'is_draft': obj.is_draft, - 'tags': obj.tags or [], + "user": obj.user.to_dict(), + "layout": layout, + "dashboard_filters_enabled": obj.dashboard_filters_enabled, + "widgets": widgets, + "is_archived": obj.is_archived, + "is_draft": obj.is_draft, + "tags": obj.tags or [], # TODO: bulk load favorites - 'updated_at': obj.updated_at, - 'created_at': obj.created_at, - 'version': obj.version + "updated_at": obj.updated_at, + "created_at": obj.created_at, + "version": obj.version, } if with_favorite_state: - d['is_favorite'] = models.Favorite.is_favorite(current_user.id, obj) + d["is_favorite"] = models.Favorite.is_favorite(current_user.id, obj) return d @@ -239,7 +274,7 @@ def serialize_job(job): JobStatus.QUEUED: 1, JobStatus.STARTED: 2, JobStatus.FINISHED: 3, - JobStatus.FAILED: 4 + JobStatus.FAILED: 4, } job_status = job.get_status() @@ -254,16 +289,16 @@ def serialize_job(job): error = str(job.result) status = 4 elif job.is_cancelled: - error = 'Query execution cancelled.' + error = "Query execution cancelled." else: - error = '' + error = "" return { - 'job': { - 'id': job.id, - 'updated_at': updated_at, - 'status': status, - 'error': error, - 'query_result_id': job.result if job.is_finished and not error else None + "job": { + "id": job.id, + "updated_at": updated_at, + "status": status, + "error": error, + "query_result_id": job.result if job.is_finished and not error else None, } } diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py index c7e314d7a1..98df1d12ff 100644 --- a/redash/tasks/__init__.py +++ b/redash/tasks/__init__.py @@ -1,6 +1,18 @@ -from .general import record_event, version_check, send_mail, sync_user_details, purge_failed_jobs -from .queries import (enqueue_query, execute_query, refresh_queries, - refresh_schemas, cleanup_query_results, empty_schedules) +from .general import ( + record_event, + version_check, + send_mail, + sync_user_details, + purge_failed_jobs, +) +from .queries import ( + enqueue_query, + execute_query, + refresh_queries, + refresh_schemas, + cleanup_query_results, + empty_schedules, +) from .alerts import check_alerts_for_query from .failure_report import send_aggregated_errors from .worker import Worker, Queue, Job @@ -9,6 +21,7 @@ from redash import rq_redis_connection from rq.connections import push_connection, pop_connection + def init_app(app): app.before_request(lambda: push_connection(rq_redis_connection)) app.teardown_request(lambda _: pop_connection()) diff --git a/redash/tasks/queries/__init__.py b/redash/tasks/queries/__init__.py index f5ee125105..7c5d34fb0f 100644 --- a/redash/tasks/queries/__init__.py +++ b/redash/tasks/queries/__init__.py @@ -1,2 +1,7 @@ -from .maintenance import refresh_queries, refresh_schemas, cleanup_query_results, empty_schedules +from .maintenance import ( + refresh_queries, + refresh_schemas, + cleanup_query_results, + empty_schedules, +) from .execution import execute_query, enqueue_query diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 8242f1f8af..c7d88513d3 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -28,7 +28,9 @@ def _unlock(query_hash, data_source_id): redis_connection.delete(_job_lock_id(query_hash, data_source_id)) -def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): +def enqueue_query( + query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={} +): query_hash = gen_query_hash(query) logging.info("Inserting job for %s with metadata=%s", query_hash, metadata) try_count = 0 @@ -48,7 +50,11 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query status = job.get_status() if status in [JobStatus.FINISHED, JobStatus.FAILED]: - logging.info("[%s] job found is ready (%s), removing lock", query_hash, status) + logging.info( + "[%s] job found is ready (%s), removing lock", + query_hash, + status, + ) redis_connection.delete(_job_lock_id(query_hash, data_source.id)) job = None @@ -62,18 +68,29 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query queue_name = data_source.queue_name scheduled_query_id = None - time_limit = settings.dynamic_settings.query_time_limit(scheduled_query, user_id, data_source.org_id) - metadata['Queue'] = queue_name + time_limit = settings.dynamic_settings.query_time_limit( + scheduled_query, user_id, data_source.org_id + ) + metadata["Queue"] = queue_name queue = Queue(queue_name) - job = queue.enqueue(execute_query, query, data_source.id, metadata, - user_id=user_id, - scheduled_query_id=scheduled_query_id, - is_api_key=is_api_key, - job_timeout=time_limit) + job = queue.enqueue( + execute_query, + query, + data_source.id, + metadata, + user_id=user_id, + scheduled_query_id=scheduled_query_id, + is_api_key=is_api_key, + job_timeout=time_limit, + ) logging.info("[%s] Created new job: %s", query_hash, job.id) - pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME) + pipe.set( + _job_lock_id(query_hash, data_source.id), + job.id, + settings.JOB_EXPIRY_TIME, + ) pipe.execute() break @@ -111,14 +128,15 @@ def _resolve_user(user_id, is_api_key, query_id): class QueryExecutor(object): - def __init__(self, query, data_source_id, user_id, is_api_key, metadata, - scheduled_query): + def __init__( + self, query, data_source_id, user_id, is_api_key, metadata, scheduled_query + ): self.job = get_current_job() self.query = query self.data_source_id = data_source_id self.metadata = metadata self.data_source = self._load_data_source() - self.user = _resolve_user(user_id, is_api_key, metadata.get('Query ID')) + self.user = _resolve_user(user_id, is_api_key, metadata.get("Query ID")) # Close DB connection to prevent holding a connection for a long time while the query is executing. models.db.session.close() @@ -133,7 +151,7 @@ def run(self): started_at = time.time() logger.debug("Executing query:\n%s", self.query) - self._log_progress('executing_query') + self._log_progress("executing_query") query_runner = self.data_source.query_runner annotated_query = self._annotate_query(query_runner) @@ -147,48 +165,61 @@ def run(self): error = text_type(e) data = None - logging.warning('Unexpected error while running query:', exc_info=1) + logging.warning("Unexpected error while running query:", exc_info=1) run_time = time.time() - started_at - logger.info("job=execute_query query_hash=%s data_length=%s error=[%s]", - self.query_hash, data and len(data), error) + logger.info( + "job=execute_query query_hash=%s data_length=%s error=[%s]", + self.query_hash, + data and len(data), + error, + ) _unlock(self.query_hash, self.data_source.id) if error is not None and data is None: result = QueryExecutionError(error) if self.scheduled_query is not None: - self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False) + self.scheduled_query = models.db.session.merge( + self.scheduled_query, load=False + ) track_failure(self.scheduled_query, error) raise result else: - if (self.scheduled_query and self.scheduled_query.schedule_failures > 0): - self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False) + if self.scheduled_query and self.scheduled_query.schedule_failures > 0: + self.scheduled_query = models.db.session.merge( + self.scheduled_query, load=False + ) self.scheduled_query.schedule_failures = 0 models.db.session.add(self.scheduled_query) query_result = models.QueryResult.store_result( - self.data_source.org_id, self.data_source, - self.query_hash, self.query, data, - run_time, utcnow()) + self.data_source.org_id, + self.data_source, + self.query_hash, + self.query, + data, + run_time, + utcnow(), + ) updated_query_ids = models.Query.update_latest_result(query_result) models.db.session.commit() # make sure that alert sees the latest query result - self._log_progress('checking_alerts') + self._log_progress("checking_alerts") for query_id in updated_query_ids: check_alerts_for_query.delay(query_id) - self._log_progress('finished') + self._log_progress("finished") result = query_result.id models.db.session.commit() return result def _annotate_query(self, query_runner): - self.metadata['Job ID'] = self.job.id - self.metadata['Query Hash'] = self.query_hash - self.metadata['Scheduled'] = self.scheduled_query is not None + self.metadata["Job ID"] = self.job.id + self.metadata["Query Hash"] = self.query_hash + self.metadata["Scheduled"] = self.scheduled_query is not None return query_runner.annotate_query(self.query, self.metadata) @@ -196,11 +227,15 @@ def _log_progress(self, state): logger.info( "job=execute_query state=%s query_hash=%s type=%s ds_id=%d " "job_id=%s queue=%s query_id=%s username=%s", - state, self.query_hash, self.data_source.type, self.data_source.id, + state, + self.query_hash, + self.data_source.type, + self.data_source.id, self.job.id, - self.metadata.get('Queue', 'unknown'), - self.metadata.get('Query ID', 'unknown'), - self.metadata.get('Username', 'unknown')) + self.metadata.get("Queue", "unknown"), + self.metadata.get("Query ID", "unknown"), + self.metadata.get("Username", "unknown"), + ) def _load_data_source(self): logger.info("job=execute_query state=load_ds ds_id=%d", self.data_source_id) @@ -209,15 +244,23 @@ def _load_data_source(self): # user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted # jobs before the upgrade to this version. -def execute_query(query, data_source_id, metadata, user_id=None, - scheduled_query_id=None, is_api_key=False): +def execute_query( + query, + data_source_id, + metadata, + user_id=None, + scheduled_query_id=None, + is_api_key=False, +): if scheduled_query_id is not None: scheduled_query = models.Query.query.get(scheduled_query_id) else: scheduled_query = None try: - return QueryExecutor(query, data_source_id, user_id, is_api_key, metadata, scheduled_query).run() + return QueryExecutor( + query, data_source_id, user_id, is_api_key, metadata, scheduled_query + ).run() except QueryExecutionError as e: models.db.session.rollback() return e diff --git a/redash/tasks/schedule.py b/redash/tasks/schedule.py index b1a527cf42..2577c8db09 100644 --- a/redash/tasks/schedule.py +++ b/redash/tasks/schedule.py @@ -10,32 +10,38 @@ from rq_scheduler import Scheduler from redash import settings, rq_redis_connection -from redash.tasks import (sync_user_details, refresh_queries, - empty_schedules, refresh_schemas, - cleanup_query_results, purge_failed_jobs, - version_check, send_aggregated_errors) +from redash.tasks import ( + sync_user_details, + refresh_queries, + empty_schedules, + refresh_schemas, + cleanup_query_results, + purge_failed_jobs, + version_check, + send_aggregated_errors, +) logger = logging.getLogger(__name__) -rq_scheduler = Scheduler(connection=rq_redis_connection, - queue_name="periodic", - interval=5) +rq_scheduler = Scheduler( + connection=rq_redis_connection, queue_name="periodic", interval=5 +) def job_id(kwargs): metadata = kwargs.copy() - metadata['func'] = metadata['func'].__name__ + metadata["func"] = metadata["func"].__name__ return hashlib.sha1(json.dumps(metadata, sort_keys=True).encode()).hexdigest() def prep(kwargs): - interval = kwargs['interval'] + interval = kwargs["interval"] if isinstance(interval, timedelta): interval = int(interval.total_seconds()) - kwargs['interval'] = interval - kwargs['result_ttl'] = kwargs.get('result_ttl', interval * 2) + kwargs["interval"] = interval + kwargs["result_ttl"] = kwargs.get("result_ttl", interval * 2) return kwargs @@ -48,10 +54,21 @@ def periodic_job_definitions(): jobs = [ {"func": refresh_queries, "interval": 30, "result_ttl": 600}, {"func": empty_schedules, "interval": timedelta(minutes=60)}, - {"func": refresh_schemas, "interval": timedelta(minutes=settings.SCHEMAS_REFRESH_SCHEDULE)}, - {"func": sync_user_details, "timeout": 60, "ttl": 45, "interval": timedelta(minutes=1)}, + { + "func": refresh_schemas, + "interval": timedelta(minutes=settings.SCHEMAS_REFRESH_SCHEDULE), + }, + { + "func": sync_user_details, + "timeout": 60, + "ttl": 45, + "interval": timedelta(minutes=1), + }, {"func": purge_failed_jobs, "interval": timedelta(days=1)}, - {"func": send_aggregated_errors, "interval": timedelta(minutes=settings.SEND_FAILURE_EMAIL_INTERVAL)} + { + "func": send_aggregated_errors, + "interval": timedelta(minutes=settings.SEND_FAILURE_EMAIL_INTERVAL), + }, ] if settings.VERSION_CHECK: @@ -70,10 +87,14 @@ def schedule_periodic_jobs(jobs): job_definitions = [prep(job) for job in jobs] jobs_to_clean_up = Job.fetch_many( - set([job.id for job in rq_scheduler.get_jobs()]) - set([job_id(job) for job in job_definitions]), - rq_redis_connection) + set([job.id for job in rq_scheduler.get_jobs()]) + - set([job_id(job) for job in job_definitions]), + rq_redis_connection, + ) - jobs_to_schedule = [job for job in job_definitions if job_id(job) not in rq_scheduler] + jobs_to_schedule = [ + job for job in job_definitions if job_id(job) not in rq_scheduler + ] for job in jobs_to_clean_up: logger.info("Removing %s (%s) from schedule.", job.id, job.func_name) @@ -81,5 +102,10 @@ def schedule_periodic_jobs(jobs): job.delete() for job in jobs_to_schedule: - logger.info("Scheduling %s (%s) with interval %s.", job_id(job), job['func'].__name__, job.get('interval')) + logger.info( + "Scheduling %s (%s) with interval %s.", + job_id(job), + job["func"].__name__, + job.get("interval"), + ) schedule(job) diff --git a/redash/tasks/worker.py b/redash/tasks/worker.py index 425d10b1ea..9f6f93558c 100644 --- a/redash/tasks/worker.py +++ b/redash/tasks/worker.py @@ -12,14 +12,14 @@ class CancellableJob(BaseJob): def cancel(self, pipeline=None): # TODO - add tests that verify that queued jobs are removed from queue and running jobs are actively cancelled if self.is_started: - self.meta['cancelled'] = True + self.meta["cancelled"] = True self.save_meta() super().cancel(pipeline=pipeline) - + @property def is_cancelled(self): - return self.meta.get('cancelled', False) + return self.meta.get("cancelled", False) class CancellableQueue(BaseQueue): @@ -40,21 +40,27 @@ class HardLimitingWorker(BaseWorker): RQ Worker by checking if the work horse is still busy with the job, even after it should have timed out (+ a grace period of 15s). If it does, it kills the work horse. """ + grace_period = 15 queue_class = CancellableQueue job_class = CancellableJob def stop_executing_job(self, job): os.kill(self.horse_pid, signal.SIGINT) - self.log.warning('Job %s has been cancelled.', job.id) + self.log.warning("Job %s has been cancelled.", job.id) def soft_limit_exceeded(self, job): seconds_under_monitor = (utcnow() - self.monitor_started).seconds return seconds_under_monitor > job.timeout + self.grace_period def enforce_hard_limit(self, job): - self.log.warning('Job %s exceeded timeout of %ds (+%ds grace period) but work horse did not terminate it. ' - 'Killing the work horse.', job.id, job.timeout, self.grace_period) + self.log.warning( + "Job %s exceeded timeout of %ds (+%ds grace period) but work horse did not terminate it. " + "Killing the work horse.", + job.id, + job.timeout, + self.grace_period, + ) self.kill_horse() def monitor_work_horse(self, job): @@ -65,7 +71,9 @@ def monitor_work_horse(self, job): self.monitor_started = utcnow() while True: try: - with UnixSignalDeathPenalty(self.job_monitoring_interval, HorseMonitorTimeoutException): + with UnixSignalDeathPenalty( + self.job_monitoring_interval, HorseMonitorTimeoutException + ): retpid, ret_val = os.waitpid(self._horse_pid, 0) break except HorseMonitorTimeoutException: @@ -103,17 +111,20 @@ def monitor_work_horse(self, job): job.ended_at = utcnow() # Unhandled failure: move the job to the failed queue - self.log.warning(( - 'Moving job to FailedJobRegistry ' - '(work-horse terminated unexpectedly; waitpid returned {})' - ).format(ret_val)) + self.log.warning( + ( + "Moving job to FailedJobRegistry " + "(work-horse terminated unexpectedly; waitpid returned {})" + ).format(ret_val) + ) self.handle_job_failure( job, exc_string="Work-horse process was terminated unexpectedly " - "(waitpid returned %s)" % ret_val + "(waitpid returned %s)" % ret_val, ) + Job = CancellableJob Queue = CancellableQueue -Worker = HardLimitingWorker \ No newline at end of file +Worker = HardLimitingWorker diff --git a/tests/tasks/test_queries.py b/tests/tasks/test_queries.py index f0e9d176cc..24153bb91a 100644 --- a/tests/tasks/test_queries.py +++ b/tests/tasks/test_queries.py @@ -9,7 +9,11 @@ from redash import redis_connection, rq_redis_connection, models from redash.utils import json_dumps from redash.query_runner.pg import PostgreSQL -from redash.tasks.queries.execution import QueryExecutionError, enqueue_query, execute_query +from redash.tasks.queries.execution import ( + QueryExecutionError, + enqueue_query, + execute_query, +) from redash.tasks import Job @@ -29,41 +33,90 @@ def create_job(*args, **kwargs): return Job(connection=rq_redis_connection) -@patch('redash.tasks.queries.execution.Job.fetch', side_effect=fetch_job) -@patch('redash.tasks.queries.execution.Queue.enqueue', side_effect=create_job) +@patch("redash.tasks.queries.execution.Job.fetch", side_effect=fetch_job) +@patch("redash.tasks.queries.execution.Queue.enqueue", side_effect=create_job) class TestEnqueueTask(BaseTestCase): def test_multiple_enqueue_of_same_query(self, enqueue, _): query = self.factory.create_query() - + with Connection(rq_redis_connection): - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) - + enqueue_query( + query.query_text, + query.data_source, + query.user_id, + False, + query, + {"Username": "Arik", "Query ID": query.id}, + ) + enqueue_query( + query.query_text, + query.data_source, + query.user_id, + False, + query, + {"Username": "Arik", "Query ID": query.id}, + ) + enqueue_query( + query.query_text, + query.data_source, + query.user_id, + False, + query, + {"Username": "Arik", "Query ID": query.id}, + ) + self.assertEqual(1, enqueue.call_count) - @patch('redash.settings.dynamic_settings.query_time_limit', return_value=60) + @patch("redash.settings.dynamic_settings.query_time_limit", return_value=60) def test_limits_query_time(self, _, enqueue, __): query = self.factory.create_query() with Connection(rq_redis_connection): - enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id}) + enqueue_query( + query.query_text, + query.data_source, + query.user_id, + False, + query, + {"Username": "Arik", "Query ID": query.id}, + ) _, kwargs = enqueue.call_args - self.assertEqual(60, kwargs.get('job_timeout')) + self.assertEqual(60, kwargs.get("job_timeout")) def test_multiple_enqueue_of_different_query(self, enqueue, _): query = self.factory.create_query() with Connection(rq_redis_connection): - enqueue_query(query.query_text, query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text + '2', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) - enqueue_query(query.query_text + '3', query.data_source, query.user_id, False, None, {'Username': 'Arik', 'Query ID': query.id}) + enqueue_query( + query.query_text, + query.data_source, + query.user_id, + False, + None, + {"Username": "Arik", "Query ID": query.id}, + ) + enqueue_query( + query.query_text + "2", + query.data_source, + query.user_id, + False, + None, + {"Username": "Arik", "Query ID": query.id}, + ) + enqueue_query( + query.query_text + "3", + query.data_source, + query.user_id, + False, + None, + {"Username": "Arik", "Query ID": query.id}, + ) self.assertEqual(3, enqueue.call_count) -@patch('redash.tasks.queries.execution.get_current_job', side_effect=fetch_job) +@patch("redash.tasks.queries.execution.get_current_job", side_effect=fetch_job) class QueryExecutorTests(BaseTestCase): def test_success(self, _): """ @@ -81,13 +134,14 @@ def test_success_scheduled(self, _): """ Scheduled queries remember their latest results. """ - q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) + q = self.factory.create_query( + query_text="SELECT 1, 2", schedule={"interval": 300} + ) with patch.object(PostgreSQL, "run_query") as qr: qr.return_value = ([1, 2], None) result_id = execute_query( - "SELECT 1, 2", - self.factory.data_source.id, {}, - scheduled_query_id=q.id) + "SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id + ) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 0) result = models.QueryResult.query.get(result_id) @@ -97,18 +151,22 @@ def test_failure_scheduled(self, _): """ Scheduled queries that fail have their failure recorded. """ - q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) + q = self.factory.create_query( + query_text="SELECT 1, 2", schedule={"interval": 300} + ) with patch.object(PostgreSQL, "run_query") as qr: qr.side_effect = ValueError("broken") - result = execute_query("SELECT 1, 2", self.factory.data_source.id, {}, - scheduled_query_id=q.id) + result = execute_query( + "SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id + ) self.assertTrue(isinstance(result, QueryExecutionError)) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 1) - result = execute_query("SELECT 1, 2", self.factory.data_source.id, {}, - scheduled_query_id=q.id) + result = execute_query( + "SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id + ) self.assertTrue(isinstance(result, QueryExecutionError)) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 2) @@ -117,20 +175,22 @@ def test_success_after_failure(self, _): """ Query execution success resets the failure counter. """ - q = self.factory.create_query(query_text="SELECT 1, 2", schedule={"interval": 300}) + q = self.factory.create_query( + query_text="SELECT 1, 2", schedule={"interval": 300} + ) with patch.object(PostgreSQL, "run_query") as qr: qr.side_effect = ValueError("broken") - result = execute_query("SELECT 1, 2", - self.factory.data_source.id, {}, - scheduled_query_id=q.id) + result = execute_query( + "SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id + ) self.assertTrue(isinstance(result, QueryExecutionError)) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 1) with patch.object(PostgreSQL, "run_query") as qr: qr.return_value = ([1, 2], None) - execute_query("SELECT 1, 2", - self.factory.data_source.id, {}, - scheduled_query_id=q.id) + execute_query( + "SELECT 1, 2", self.factory.data_source.id, {}, scheduled_query_id=q.id + ) q = models.Query.get_by_id(q.id) self.assertEqual(q.schedule_failures, 0) diff --git a/tests/tasks/test_schedule.py b/tests/tasks/test_schedule.py index 5a8b1d19b5..3e3329f62d 100644 --- a/tests/tasks/test_schedule.py +++ b/tests/tasks/test_schedule.py @@ -3,6 +3,7 @@ from redash.tasks.schedule import rq_scheduler, schedule_periodic_jobs + class TestSchedule(TestCase): def setUp(self): for job in rq_scheduler.get_jobs(): @@ -18,19 +19,18 @@ def foo(): jobs = [job for job in rq_scheduler.get_jobs()] self.assertEqual(len(jobs), 1) - self.assertTrue(jobs[0].func_name.endswith('foo')) - self.assertEqual(jobs[0].meta['interval'], 60) + self.assertTrue(jobs[0].func_name.endswith("foo")) + self.assertEqual(jobs[0].meta["interval"], 60) def test_doesnt_reschedule_an_existing_job(self): def foo(): pass schedule_periodic_jobs([{"func": foo, "interval": 60}]) - with patch('redash.tasks.rq_scheduler.schedule') as schedule: + with patch("redash.tasks.rq_scheduler.schedule") as schedule: schedule_periodic_jobs([{"func": foo, "interval": 60}]) schedule.assert_not_called() - def test_reschedules_a_modified_job(self): def foo(): pass @@ -41,8 +41,8 @@ def foo(): jobs = [job for job in rq_scheduler.get_jobs()] self.assertEqual(len(jobs), 1) - self.assertTrue(jobs[0].func_name.endswith('foo')) - self.assertEqual(jobs[0].meta['interval'], 120) + self.assertTrue(jobs[0].func_name.endswith("foo")) + self.assertEqual(jobs[0].meta["interval"], 120) def test_removes_jobs_that_are_no_longer_defined(self): def foo(): @@ -51,11 +51,14 @@ def foo(): def bar(): pass - schedule_periodic_jobs([{"func": foo, "interval": 60}, {"func": bar, "interval": 90}]) + schedule_periodic_jobs( + [{"func": foo, "interval": 60}, {"func": bar, "interval": 90}] + ) schedule_periodic_jobs([{"func": foo, "interval": 60}]) jobs = [job for job in rq_scheduler.get_jobs()] self.assertEqual(len(jobs), 1) - self.assertTrue(jobs[0].func_name.endswith('foo')) - self.assertEqual(jobs[0].meta['interval'], 60) \ No newline at end of file + self.assertTrue(jobs[0].func_name.endswith("foo")) + self.assertEqual(jobs[0].meta["interval"], 60) + From 33af0b3632b592607269aab1ce0d2ed434bbf41e Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Mon, 30 Dec 2019 11:42:06 +0000 Subject: [PATCH 24/24] run RQ on all queues when running in Cypress --- .circleci/docker-compose.cypress.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.circleci/docker-compose.cypress.yml b/.circleci/docker-compose.cypress.yml index 20cc9980e4..6781c3d5ed 100644 --- a/.circleci/docker-compose.cypress.yml +++ b/.circleci/docker-compose.cypress.yml @@ -31,7 +31,6 @@ services: REDASH_LOG_LEVEL: "INFO" REDASH_REDIS_URL: "redis://redis:6379/0" REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" - QUEUES: "default periodic schemas" celery_worker: build: ../ command: celery_worker