From 8c8f2785bdbc707bfe3edd969c4f021d2ea20dce Mon Sep 17 00:00:00 2001 From: Owais Lone Date: Wed, 20 Jan 2021 22:11:19 +0530 Subject: [PATCH] SQLAlchemy: Use SQL operation and DB name as the Span name (#254) Current instrumentation uses the entire SQL query as the operation name which makes traces very hard to read and understand in addition to introducing high-cardinality issues. This commit fixes the problem by using only the SQL operation name and the DB name instead of the entire query. --- CHANGELOG.md | 4 +- .../instrumentation/sqlalchemy/engine.py | 72 +++++++++++-------- .../tests/test_sqlalchemy.py | 4 +- .../tests/sqlalchemy_tests/mixins.py | 10 +-- .../tests/sqlalchemy_tests/test_instrument.py | 4 +- .../tests/sqlalchemy_tests/test_mysql.py | 2 +- .../tests/sqlalchemy_tests/test_postgres.py | 2 +- .../tests/sqlalchemy_tests/test_sqlite.py | 2 +- 8 files changed, 59 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0312ed9764..4ef56d7034 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,7 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-wsgi` Return `None` for `CarrierGetter` if key not found - ([#1374](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233)) + ([#233](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233)) - `opentelemetry-instrumentation-grpc` Comply with updated spec, rework tests ([#236](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/236)) - `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-falcon`, `opentelemetry-instrumentation-flask`, `opentelemetry-instrumentation-pyramid`, `opentelemetry-instrumentation-wsgi` Renamed `host.port` attribute to `net.host.port` @@ -57,6 +57,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235)) - `opentelemetry-exporter-datadog` `opentelemetry-sdk-extension-aws` Fix reference to ids_generator in sdk ([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235)) +- `opentelemetry-instrumentation-sqlalchemy` Use SQL operation and DB name as span name. + ([#254](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/254)) ## [0.16b1](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.16b1) - 2020-11-26 diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index 86f52ad724..4843aa5543 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from sqlalchemy.event import listen +from sqlalchemy.event import listen # pylint: disable=no-name-in-module from opentelemetry import trace from opentelemetry.instrumentation.sqlalchemy.version import __version__ @@ -72,22 +72,38 @@ def __init__(self, tracer, engine): listen(engine, "after_cursor_execute", self._after_cur_exec) listen(engine, "handle_error", self._handle_error) + def _operation_name(self, db_name, statement): + parts = [] + if isinstance(statement, str): + # otel spec recommends against parsing SQL queries. We are not trying to parse SQL + # but simply truncating the statement to the first word. This covers probably >95% + # use cases and uses the SQL statement in span name correctly as per the spec. + # For some very special cases it might not record the correct statement if the SQL + # dialect is too weird but in any case it shouldn't break anything. + parts.append(statement.split()[0]) + if db_name: + parts.append(db_name) + if not parts: + return self.vendor + return " ".join(parts) + # pylint: disable=unused-argument def _before_cur_exec(self, conn, cursor, statement, *args): + attrs, found = _get_attributes_from_url(conn.engine.url) + if not found: + attrs = _get_attributes_from_cursor(self.vendor, cursor, attrs) + + db_name = attrs.get(_DB, "") self.current_span = self.tracer.start_span( - statement, kind=trace.SpanKind.CLIENT + self._operation_name(db_name, statement), + kind=trace.SpanKind.CLIENT, ) with self.tracer.use_span(self.current_span, end_on_exit=False): if self.current_span.is_recording(): self.current_span.set_attribute(_STMT, statement) self.current_span.set_attribute("db.system", self.vendor) - - if not _set_attributes_from_url( - self.current_span, conn.engine.url - ): - _set_attributes_from_cursor( - self.current_span, self.vendor, cursor - ) + for key, value in attrs.items(): + self.current_span.set_attribute(key, value) # pylint: disable=unused-argument def _after_cur_exec(self, conn, cursor, statement, *args): @@ -108,25 +124,22 @@ def _handle_error(self, context): self.current_span.end() -def _set_attributes_from_url(span: trace.Span, url): +def _get_attributes_from_url(url): """Set connection tags from the url. return true if successful.""" - if span.is_recording(): - if url.host: - span.set_attribute(_HOST, url.host) - if url.port: - span.set_attribute(_PORT, url.port) - if url.database: - span.set_attribute(_DB, url.database) - if url.username: - span.set_attribute(_USER, url.username) - - return bool(url.host) - - -def _set_attributes_from_cursor(span: trace.Span, vendor, cursor): + attrs = {} + if url.host: + attrs[_HOST] = url.host + if url.port: + attrs[_PORT] = url.port + if url.database: + attrs[_DB] = url.database + if url.username: + attrs[_USER] = url.username + return attrs, bool(url.host) + + +def _get_attributes_from_cursor(vendor, cursor, attrs): """Attempt to set db connection attributes by introspecting the cursor.""" - if not span.is_recording(): - return if vendor == "postgresql": # pylint: disable=import-outside-toplevel from psycopg2.extensions import parse_dsn @@ -135,6 +148,7 @@ def _set_attributes_from_cursor(span: trace.Span, vendor, cursor): dsn = getattr(cursor.connection, "dsn", None) if dsn: data = parse_dsn(dsn) - span.set_attribute(_DB, data.get("dbname")) - span.set_attribute(_HOST, data.get("host")) - span.set_attribute(_PORT, int(data.get("port"))) + attrs[_DB] = data.get("dbname") + attrs[_HOST] = data.get("host") + attrs[_PORT] = int(data.get("port")) + return attrs diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py index dfc6e429a7..673f401be5 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py @@ -35,7 +35,7 @@ def test_trace_integration(self): spans = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans), 1) - self.assertEqual(spans[0].name, "SELECT 1 + 1;") + self.assertEqual(spans[0].name, "SELECT :memory:") self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT) def test_not_recording(self): @@ -68,5 +68,5 @@ def test_create_engine_wrapper(self): spans = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans), 1) - self.assertEqual(spans[0].name, "SELECT 1 + 1;") + self.assertEqual(spans[0].name, "SELECT :memory:") self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT) diff --git a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/mixins.py b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/mixins.py index b2e9f24e3d..62e9b3622a 100644 --- a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/mixins.py +++ b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/mixins.py @@ -110,6 +110,8 @@ def tearDown(self): super().tearDown() def _check_span(self, span, name): + if self.SQL_DB: + name = "{0} {1}".format(name, self.SQL_DB) self.assertEqual(span.name, name) self.assertEqual(span.attributes.get(_DB), self.SQL_DB) self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET) @@ -129,7 +131,7 @@ def test_orm_insert(self): stmt += "(?, ?)" else: stmt += "(%(id)s, %(name)s)" - self._check_span(span, stmt) + self._check_span(span, "INSERT") self.assertIn("INSERT INTO players", span.attributes.get(_STMT)) self.check_meta(span) @@ -146,7 +148,7 @@ def test_session_query(self): stmt += "?" else: stmt += "%(name_1)s" - self._check_span(span, stmt) + self._check_span(span, "SELECT") self.assertIn( "SELECT players.id AS players_id, players.name AS players_name \nFROM players \nWHERE players.name", span.attributes.get(_STMT), @@ -163,7 +165,7 @@ def test_engine_connect_execute(self): spans = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans), 1) span = spans[0] - self._check_span(span, stmt) + self._check_span(span, "SELECT") self.assertEqual(span.attributes.get(_STMT), "SELECT * FROM players") self.check_meta(span) @@ -188,4 +190,4 @@ def test_parent(self): self.assertEqual(parent_span.name, "sqlalch_op") self.assertEqual(parent_span.instrumentation_info.name, "sqlalch_svc") - self.assertEqual(child_span.name, stmt) + self.assertEqual(child_span.name, "SELECT " + self.SQL_DB) diff --git a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_instrument.py b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_instrument.py index 3da823d5ba..ac5b8f9389 100644 --- a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_instrument.py +++ b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_instrument.py @@ -56,7 +56,7 @@ def tearDown(self): def test_engine_traced(self): # ensures that the engine is traced - rows = self.conn.execute("SELECT 1").fetchall() + rows = self.conn.execute("SELECT").fetchall() self.assertEqual(len(rows), 1) traces = self.memory_exporter.get_finished_spans() @@ -64,6 +64,6 @@ def test_engine_traced(self): self.assertEqual(len(traces), 1) span = traces[0] # check subset of span fields - self.assertEqual(span.name, "SELECT 1") + self.assertEqual(span.name, "SELECT opentelemetry-tests") self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET) self.assertGreater((span.end_time - span.start_time), 0) diff --git a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_mysql.py b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_mysql.py index e37fa4d51e..31942dd582 100644 --- a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_mysql.py +++ b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_mysql.py @@ -67,7 +67,7 @@ def test_engine_execute_errors(self): self.assertEqual(len(spans), 1) span = spans[0] # span fields - self.assertEqual(span.name, "SELECT * FROM a_wrong_table") + self.assertEqual(span.name, "SELECT opentelemetry-tests") self.assertEqual( span.attributes.get(_STMT), "SELECT * FROM a_wrong_table" ) diff --git a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_postgres.py b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_postgres.py index 2642d52a74..f9c38b1511 100644 --- a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_postgres.py +++ b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_postgres.py @@ -65,7 +65,7 @@ def test_engine_execute_errors(self): self.assertEqual(len(spans), 1) span = spans[0] # span fields - self.assertEqual(span.name, "SELECT * FROM a_wrong_table") + self.assertEqual(span.name, "SELECT opentelemetry-tests") self.assertEqual( span.attributes.get(_STMT), "SELECT * FROM a_wrong_table" ) diff --git a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_sqlite.py b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_sqlite.py index a3fae96bb8..2f64642486 100644 --- a/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_sqlite.py +++ b/tests/opentelemetry-docker-tests/tests/sqlalchemy_tests/test_sqlite.py @@ -43,7 +43,7 @@ def test_engine_execute_errors(self): self.assertEqual(len(spans), 1) span = spans[0] # span fields - self.assertEqual(span.name, stmt) + self.assertEqual(span.name, "SELECT :memory:") self.assertEqual( span.attributes.get(_STMT), "SELECT * FROM a_wrong_table" )