-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Upgrade postgres check to psycopg3 #15411
Changes from all commits
dcaee7e
9793b74
f2a695c
1d4cb95
1cc2845
e2e31b1
3b63c9c
e9f1903
fd0987d
5c24f04
e9a2308
380104a
14dfdf2
349f6b1
3b7c879
ffcc8a4
6cb39b8
b2c9865
e72c545
302f51b
24f3aca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,6 +62,7 @@ prometheus-client,PyPI,Apache-2.0,Copyright 2015 The Prometheus Authors | |
protobuf,PyPI,BSD-3-Clause,Copyright 2008 Google Inc. | ||
protobuf,PyPI,BSD-3-Clause,Copyright 2008 Google Inc. All rights reserved. | ||
psutil,PyPI,BSD-3-Clause,"Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola'" | ||
psycopg,PyPI,LGPL-3.0-only,Copyright (C) 2020 The Psycopg Team | ||
psycopg2-binary,PyPI,BSD-3-Clause,Copyright 2013 Federico Di Gregorio | ||
psycopg2-binary,PyPI,LGPL-3.0-only,Copyright (C) 2013 Federico Di Gregorio | ||
pyasn1,PyPI,BSD-3-Clause,"Copyright (c) 2005-2019, Ilya Etingof <[email protected]>" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
import threading | ||
import time | ||
from concurrent.futures.thread import ThreadPoolExecutor | ||
from ipaddress import IPv4Address | ||
from itertools import chain | ||
from typing import Any, Callable, Dict, List, Tuple # noqa: F401 | ||
|
||
|
@@ -183,6 +184,8 @@ def default_json_event_encoding(o): | |
return float(o) | ||
if isinstance(o, (datetime.date, datetime.datetime)): | ||
return o.isoformat() | ||
if isinstance(o, IPv4Address): | ||
return str(o) | ||
raise TypeError | ||
|
||
|
||
|
@@ -259,6 +262,9 @@ def cancel(self): | |
Send a signal to cancel the job loop asynchronously. | ||
""" | ||
self._cancel_event.set() | ||
# after setting cancel event, wait for job loop to fully shutdown | ||
if self._job_loop_future: | ||
self._job_loop_future.result() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. without this change, we see a race condition that can happen when multiple job loop threads are running & cancel is called. This can sometimes result in a segfault: tests/test_statements.py::test_async_job_enabled[True3-True1-True0] Fatal Python error: Segmentation fault Current thread 0x00007f4924d0b700 (most recent call first):
File "/home/ec2-user/.local/share/hatch/env/virtual/datadog-postgres/550Gv7FF/py3.9-12.1/lib/python3.9/site-packages/psycopg/cursor.py", line 516 in _select_current_result
File "/home/ec2-user/.local/share/hatch/env/virtual/datadog-postgres/550Gv7FF/py3.9-12.1/lib/python3.9/site-packages/psycopg/cursor.py", line 205 in _execute_gen
File "/home/ec2-user/.local/share/hatch/env/virtual/datadog-postgres/550Gv7FF/py3.9-12.1/lib/python3.9/site-packages/psycopg/connection.py", line 957 in wait
....
...
File "/home/ec2-user/dd/integrations-core/datadog_checks_base/datadog_checks/base/utils/tracking.py", line 71 in wrapper
File "/home/ec2-user/dd/integrations-core/postgres/datadog_checks/postgres/statement_samples.py", line 434 in run_job
File "/home/ec2-user/dd/integrations-core/datadog_checks_base/datadog_checks/base/utils/db/utils.py", line 348 in _run_job_traced
File "/home/ec2-user/dd/integrations-core/datadog_checks_base/datadog_checks/base/utils/db/utils.py", line 342 in _run_job_rate_limited
File "/home/ec2-user/dd/integrations-core/datadog_checks_base/datadog_checks/base/utils/db/utils.py", line 303 in _job_loop
File "/home/ec2-user/anaconda3/envs/agent39/lib/python3.9/concurrent/futures/thread.py", line 58 in run
File "/home/ec2-user/anaconda3/envs/agent39/lib/python3.9/concurrent/futures/thread.py", line 83 in _worker
This is because the call to cancel would simply set the cancel() threading event & would return immediately, which would result in a call to close_all_connections. This is a problem bc setting the cancel event on the job loop thread does not guarantee that the thread isn't already executing at the time we end up calling close for all connections in the db pool. (we only check if cancel is set [here](https://github.com/DataDog/integrations-core/blob/master/datadog_checks_base/datadog_checks/base/utils/db/utils.py#L290) and [here](https://github.com/DataDog/integrations-core/blob/master/datadog_checks_base/datadog_checks/base/utils/db/utils.py#L340)) Therefore, the only way to safely cancel all job loops is to do the following:
|
||
|
||
def run_job_loop(self, tags): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
|
||
import logging | ||
|
||
import psycopg2 | ||
from psycopg.rows import dict_row | ||
|
||
from datadog_checks.base.utils.db.sql import compute_sql_signature | ||
from datadog_checks.base.utils.tracking import tracked_method | ||
|
@@ -16,12 +16,12 @@ | |
PREPARE_STATEMENT_QUERY = 'PREPARE dd_{query_signature} AS {statement}' | ||
|
||
PARAM_TYPES_COUNT_QUERY = '''\ | ||
SELECT CARDINALITY(parameter_types) FROM pg_prepared_statements WHERE name = 'dd_{query_signature}' | ||
SELECT CARDINALITY(parameter_types) as count FROM pg_prepared_statements WHERE name = 'dd_{query_signature}' | ||
''' | ||
|
||
EXECUTE_PREPARED_STATEMENT_QUERY = 'EXECUTE dd_{prepared_statement}({generic_values})' | ||
|
||
EXPLAIN_QUERY = 'SELECT {explain_function}($stmt${statement}$stmt$)' | ||
EXPLAIN_QUERY = 'SELECT {explain_function}($stmt${statement}$stmt$) as explain_statement' | ||
|
||
|
||
def agent_check_getter(self): | ||
|
@@ -81,7 +81,7 @@ def explain_statement(self, dbname, statement, obfuscated_statement): | |
result = self._explain_prepared_statement(dbname, statement, obfuscated_statement, query_signature) | ||
self._deallocate_prepared_statement(dbname, query_signature) | ||
if result: | ||
return result[0][0][0] | ||
return result[0]['explain_statement'][0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do prefer |
||
return None | ||
|
||
def _set_plan_cache_mode(self, dbname): | ||
|
@@ -112,7 +112,10 @@ def _get_number_of_parameters_for_prepared_statement(self, dbname, query_signatu | |
rows = self._execute_query_and_fetch_rows( | ||
dbname, PARAM_TYPES_COUNT_QUERY.format(query_signature=query_signature) | ||
) | ||
return rows[0][0] if rows else 0 | ||
count = 0 | ||
if rows and 'count' in rows[0]: | ||
count = rows[0]['count'] | ||
return count | ||
|
||
@tracked_method(agent_check_getter=agent_check_getter) | ||
def _explain_prepared_statement(self, dbname, statement, obfuscated_statement, query_signature): | ||
|
@@ -157,15 +160,14 @@ def _deallocate_prepared_statement(self, dbname, query_signature): | |
) | ||
|
||
def _execute_query(self, dbname, query): | ||
# Psycopg2 connections do not get closed when context ends; | ||
# leaving context will just mark the connection as inactive in MultiDatabaseConnectionPool | ||
with self._check.db_pool.get_connection(dbname, self._check._config.idle_connection_timeout) as conn: | ||
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: | ||
with conn.cursor(row_factory=dict_row) as cursor: | ||
logger.debug('Executing query=[%s]', query) | ||
cursor.execute(query) | ||
|
||
def _execute_query_and_fetch_rows(self, dbname, query): | ||
with self._check.db_pool.get_connection(dbname, self._check._config.idle_connection_timeout) as conn: | ||
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: | ||
with conn.cursor(row_factory=dict_row) as cursor: | ||
logger.debug('Executing query=[%s]', query) | ||
cursor.execute(query) | ||
return cursor.fetchall() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We've been seeing json serialization errors for a long time now due to some activity payloads containing
IPv4Address
objects