Skip to content

Commit

Permalink
Wait for Presto to actually be ready to run queries
Browse files Browse the repository at this point in the history
  • Loading branch information
evansd committed Jul 1, 2020
1 parent 94092f2 commit 5806baf
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
20 changes: 8 additions & 12 deletions cohortextractor/presto_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
import requests


class PrestoNoActiveNodesError(Exception):
pass


def presto_connection_from_url(url):
return ConnectionProxy(
prestodb.dbapi.connect(**presto_connection_params_from_url(url))
Expand Down Expand Up @@ -40,24 +36,24 @@ def presto_connection_params_from_url(url):
return connection_params


def wait_for_presto_to_be_ready(url, timeout):
def wait_for_presto_to_be_ready(url, test_query, timeout):
"""
Waits for Presto to be ready to execute queries by repeatedly attempting to
connect and run `test_query`, raising the last received error after
`timeout` seconds
"""
connection_params = presto_connection_params_from_url(url)
start = time.time()
while True:
try:
connection = prestodb.dbapi.connect(**connection_params)
cursor = connection.cursor()
cursor.execute(
"SELECT COUNT(*) FROM system.runtime.nodes WHERE state = 'active'"
)
count = cursor.fetchone()[0]
if count == 0:
raise PrestoNoActiveNodesError("No active nodes found")
cursor.execute(test_query)
cursor.fetchall()
break
except (
prestodb.exceptions.PrestoQueryError,
requests.exceptions.ConnectionError,
PrestoNoActiveNodesError,
):
if time.time() - start < timeout:
time.sleep(1)
Expand Down
12 changes: 11 additions & 1 deletion tests/acme_backend_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,17 @@ def make_engine():
time.sleep(1)
else:
raise
wait_for_presto_to_be_ready(os.environ["ACME_DATABASE_URL"], timeout)
wait_for_presto_to_be_ready(
os.environ["ACME_DATABASE_URL"],
# Presto will show active nodes in its `system.runtime.nodes` table but
# then throw a "no nodes available" error if you try to execute a query
# which needs to touch the MSSQL instance. So to properly confirm that
# Presto is ready we need a query which forces it to connect to MSSQL,
# but ideally one which doesn't depend on any particular configuration
# having been done first. The below seems to do the trick.
"SELECT 1 FROM sys.tables",
timeout,
)
return engine


Expand Down

0 comments on commit 5806baf

Please sign in to comment.