From 32018f70290246213a7bea031fa0ab899e866479 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Wed, 21 Jun 2023 16:51:17 -0700 Subject: [PATCH] refactor!: Move shared sql behavior to helper gems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mysql2, trilogy, and pg instrumentation contained duplicated constants and methods. This refactor creates two new gems, opentelemetry-helpers-mysql and opentelemetry-helpers-sql-obfuscation, to hold the shared code. It also: * Improves SQL statement query finding for users of Marginalia/Active Record Query Logs
 * UTF-8 encodes MySQL statements before extraction * Adds fixtures from New Relic’s SQL obfuscation tests * Adjusts regex queries to support multiple lines * Adds obfuscation support for SQLite, Apache Cassandra, and Oracle --- .github/dependabot.yml | 8 + .github/workflows/ci-contrib-canary.yml | 47 ++ .github/workflows/ci-contrib.yml | 39 + .toys/.data/releases.yml | 8 + README.md | 7 + helpers/mysql/.rubocop.yml | 4 + helpers/mysql/.yardopts | 9 + helpers/mysql/CHANGELOG.md | 1 + helpers/mysql/Gemfile | 9 + helpers/mysql/LICENSE | 201 +++++ helpers/mysql/README.md | 56 ++ helpers/mysql/Rakefile | 28 + .../mysql/lib/opentelemetry-helpers-mysql.rb | 7 + helpers/mysql/lib/opentelemetry/helpers.rb | 15 + .../mysql/lib/opentelemetry/helpers/mysql.rb | 91 +++ .../opentelemetry/helpers/mysql/version.rb | 13 + .../mysql/opentelemetry-helpers-mysql.gemspec | 47 ++ helpers/mysql/test/helpers/mysql_test.rb | 162 +++++ helpers/mysql/test/test_helper.rb | 14 + helpers/sql-obfuscation/.rubocop.yml | 4 + helpers/sql-obfuscation/.yardopts | 9 + helpers/sql-obfuscation/CHANGELOG.md | 1 + helpers/sql-obfuscation/Gemfile | 9 + helpers/sql-obfuscation/LICENSE | 201 +++++ helpers/sql-obfuscation/README.md | 62 ++ helpers/sql-obfuscation/Rakefile | 28 + .../opentelemetry-helpers-sql-obfuscation.rb | 7 + .../lib/opentelemetry/helpers.rb | 14 + .../opentelemetry/helpers/sql_obfuscation.rb | 139 ++++ .../helpers/sql_obfuscation/version.rb | 13 + ...ntelemetry-helpers-sql-obfuscation.gemspec | 45 ++ .../test/fixtures/sql_obfuscation.json | 685 ++++++++++++++++++ .../test/helpers/sql_obfuscation_test.rb | 108 +++ helpers/sql-obfuscation/test/test_helper.rb | 11 + instrumentation/all/Gemfile | 6 + instrumentation/mysql2/Gemfile | 2 + instrumentation/mysql2/example/mysql2.rb | 4 +- .../opentelemetry/instrumentation/mysql2.rb | 10 +- .../instrumentation/mysql2/patches/client.rb | 115 +-- ...entelemetry-instrumentation-mysql2.gemspec | 2 + .../mysql2/instrumentation_test.rb | 2 +- instrumentation/pg/Gemfile | 1 + .../instrumentation/pg/constants.rb | 23 - .../instrumentation/pg/patches/connection.rb | 38 +- .../opentelemetry-instrumentation-pg.gemspec | 1 + instrumentation/que/Gemfile | 3 +- instrumentation/trilogy/Gemfile | 2 + .../instrumentation/trilogy/patches/client.rb | 118 +-- ...ntelemetry-instrumentation-trilogy.gemspec | 2 + .../trilogy/instrumentation_test.rb | 2 +- 50 files changed, 2171 insertions(+), 262 deletions(-) create mode 100644 helpers/mysql/.rubocop.yml create mode 100644 helpers/mysql/.yardopts create mode 100644 helpers/mysql/CHANGELOG.md create mode 100644 helpers/mysql/Gemfile create mode 100644 helpers/mysql/LICENSE create mode 100644 helpers/mysql/README.md create mode 100644 helpers/mysql/Rakefile create mode 100644 helpers/mysql/lib/opentelemetry-helpers-mysql.rb create mode 100644 helpers/mysql/lib/opentelemetry/helpers.rb create mode 100644 helpers/mysql/lib/opentelemetry/helpers/mysql.rb create mode 100644 helpers/mysql/lib/opentelemetry/helpers/mysql/version.rb create mode 100644 helpers/mysql/opentelemetry-helpers-mysql.gemspec create mode 100644 helpers/mysql/test/helpers/mysql_test.rb create mode 100644 helpers/mysql/test/test_helper.rb create mode 100644 helpers/sql-obfuscation/.rubocop.yml create mode 100644 helpers/sql-obfuscation/.yardopts create mode 100644 helpers/sql-obfuscation/CHANGELOG.md create mode 100644 helpers/sql-obfuscation/Gemfile create mode 100644 helpers/sql-obfuscation/LICENSE create mode 100644 helpers/sql-obfuscation/README.md create mode 100644 helpers/sql-obfuscation/Rakefile create mode 100644 helpers/sql-obfuscation/lib/opentelemetry-helpers-sql-obfuscation.rb create mode 100644 helpers/sql-obfuscation/lib/opentelemetry/helpers.rb create mode 100644 helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation.rb create mode 100644 helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation/version.rb create mode 100644 helpers/sql-obfuscation/opentelemetry-helpers-sql-obfuscation.gemspec create mode 100644 helpers/sql-obfuscation/test/fixtures/sql_obfuscation.json create mode 100644 helpers/sql-obfuscation/test/helpers/sql_obfuscation_test.rb create mode 100644 helpers/sql-obfuscation/test/test_helper.rb diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5485d59f1..9e4ed404b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,6 +9,14 @@ updates: directory: "/" schedule: interval: weekly +- package-ecosystem: bundler + directory: "/helpers/mysql" + schedule: + interval: weekly +- package-ecosystem: bundler + directory: "/helpers/sql-obfuscation" + schedule: + interval: weekly - package-ecosystem: bundler directory: "/propagator/ottrace" schedule: diff --git a/.github/workflows/ci-contrib-canary.yml b/.github/workflows/ci-contrib-canary.yml index 8ea179d8d..68dc07d1a 100644 --- a/.github/workflows/ci-contrib-canary.yml +++ b/.github/workflows/ci-contrib-canary.yml @@ -6,6 +6,53 @@ on: - cron: "0 0 * * *" jobs: + helpers: + strategy: + fail-fast: false + matrix: + gem: + - mysql + - sql-obfuscation + os: + - ubuntu-latest + - macos-latest + - windows-latest + name: "helpers-${{ matrix.gem }} / ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - name: "Test Ruby 3.2" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "3.2" + - name: "Test Ruby 3.1" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "3.1" + - name: "Test Ruby 3.0" + if: "${{ matrix.os == 'ubuntu-latest' }}" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "3.0" + yard: true + rubocop: true + build: true + - name: "Test JRuby" + if: "${{ matrix.os == 'ubuntu-latest' }}" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "jruby-9.4.2.0" + - name: "Test truffleruby" + if: "${{ matrix.os == 'ubuntu-latest' }}" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "truffleruby" + propagators: strategy: fail-fast: false diff --git a/.github/workflows/ci-contrib.yml b/.github/workflows/ci-contrib.yml index b00658894..943a8a854 100644 --- a/.github/workflows/ci-contrib.yml +++ b/.github/workflows/ci-contrib.yml @@ -10,6 +10,45 @@ on: - main jobs: + helpers: + strategy: + fail-fast: false + matrix: + gem: + - mysql + - sql-obfuscation + os: + - ubuntu-latest + name: "helpers-${{ matrix.gem }} / ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - name: "Test Ruby 3.2" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "3.2" + - name: "Test Ruby 3.1" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "3.1" + - name: "Test Ruby 3.0" + if: "${{ matrix.os == 'ubuntu-latest' }}" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "3.0" + yard: true + rubocop: true + build: true + - name: "Test JRuby" + if: "${{ matrix.os == 'ubuntu-latest' }}" + uses: ./.github/actions/test_gem + with: + gem: "opentelemetry-helpers-${{ matrix.gem }}" + ruby: "jruby-9.4.2.0" + propagators: strategy: fail-fast: false diff --git a/.toys/.data/releases.yml b/.toys/.data/releases.yml index 1bacd4b35..fcbdabbb0 100644 --- a/.toys/.data/releases.yml +++ b/.toys/.data/releases.yml @@ -34,6 +34,14 @@ gems: directory: instrumentation/gruf version_constant: [OpenTelemetry, Instrumentation, Gruf, VERSION] + - name: opentelemetry-helpers-mysql + directory: helpers/mysql + version_constant: [OpenTelemetry, Helpers, MySQL, VERSION] + + - name: opentelemetry-helpers-sql-obfuscation + directory: helpers/sql-obfuscation + version_constant: [OpenTelemetry, Helpers, SqlObfuscation, VERSION] + - name: opentelemetry-instrumentation-grape directory: instrumentation/grape version_constant: [OpenTelemetry, Instrumentation, Grape, VERSION] diff --git a/README.md b/README.md index f63167879..291f36bbc 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,13 @@ gems, including Rails, Rack, Sinatra, and others, so you can start using OpenTelemetry with minimal changes to your application. See the [instrumentation README](instrumentation/) for more details. +## Helpers + +This repository also contains libraries that hold code shared among +multiple instrumentation libraries. + +- [MySQL](helpers/mysql/) +- [SQL Obfuscation](helpers/sql-obfuscation/) ## Additional Libraries This repository also contains libraries to aid with interoperablity with vendor specific tracing solutions: diff --git a/helpers/mysql/.rubocop.yml b/helpers/mysql/.rubocop.yml new file mode 100644 index 000000000..4b31975de --- /dev/null +++ b/helpers/mysql/.rubocop.yml @@ -0,0 +1,4 @@ +inherit_from: ../../.rubocop.yml + +Gemspec/DevelopmentDependencies: + Enabled: false diff --git a/helpers/mysql/.yardopts b/helpers/mysql/.yardopts new file mode 100644 index 000000000..5e947205e --- /dev/null +++ b/helpers/mysql/.yardopts @@ -0,0 +1,9 @@ +--no-private +--title=OpenTelemetry MySQL Instrumentation Helpers +--markup=markdown +--main=README.md +./lib/opentelemetry/helpers/**/*.rb +./lib/opentelemetry/helpers.rb +- +README.md +CHANGELOG.md diff --git a/helpers/mysql/CHANGELOG.md b/helpers/mysql/CHANGELOG.md new file mode 100644 index 000000000..9c3a11771 --- /dev/null +++ b/helpers/mysql/CHANGELOG.md @@ -0,0 +1 @@ +# Release History: opentelemetry-helpers-mysql diff --git a/helpers/mysql/Gemfile b/helpers/mysql/Gemfile new file mode 100644 index 000000000..f649e2f64 --- /dev/null +++ b/helpers/mysql/Gemfile @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +source 'https://rubygems.org' + +gemspec diff --git a/helpers/mysql/LICENSE b/helpers/mysql/LICENSE new file mode 100644 index 000000000..1ef7dad2c --- /dev/null +++ b/helpers/mysql/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright The OpenTelemetry Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/helpers/mysql/README.md b/helpers/mysql/README.md new file mode 100644 index 000000000..dcfaab979 --- /dev/null +++ b/helpers/mysql/README.md @@ -0,0 +1,56 @@ +# OpenTelemetry Instrumentation Helpers: MySQL + +This Ruby gem contains logic shared among MySQL adapter libraries, such as mysql2 and trilogy. It's intended for use by by gem authors instrumenting MySQL adapter libraries. + +## Usage + +Add the gem to your instrumentation's gemspec file: + +```ruby +# opentelemetry-instrumentation-your-gem.gemspec + spec.add_dependency 'opentelemetry-helpers-mysql' +``` + +Add the gem to your instrumentation's Gemfile: + +```ruby +# Gemfile + +group :test, :development do + gem 'opentelemetry-helpers-mysql', path: '../../helpers/mysql' +end +``` + +Make sure the `Instrumentation` class for your gem contains configuration options for: +- `:span_name`: The type of span name desired for the trace. + Example: `option :span_name, default: :statement_type, validate: %I[statement_type db_name db_operation_and_name]` + +## Examples + +To set the span name in your library: + +```ruby +tracer.in_span( + OpenTelemetry::Helpers::MySQL.database_span_name(sql, operation, database_name, config), + attributes: attributes.merge!(OpenTelemetry::Instrumentation::Mysql2.attributes), + kind: :client +) do + super(sql, options) +end +``` + +## How can I get involved? + +The `opentelemetry-helpers-mysql` gem source is [on github][repo-github], along with related gems including `opentelemetry-instrumentation-mysql2` and `opentelemetry-instrumentation-trilogy`. + +The OpenTelemetry Ruby gems are maintained by the OpenTelemetry-Ruby special interest group (SIG). You can get involved by joining us on our [gitter channel][ruby-gitter] or attending our weekly meeting. See the [meeting calendar][community-meetings] for dates and times. For more information on this and other language SIGs, see the OpenTelemetry [community page][ruby-sig]. + +## License + +The `opentelemetry-helpers-mysql` gem is distributed under the Apache 2.0 license. See [LICENSE][license-github] for more information. + +[repo-github]: https://github.com/open-telemetry/opentelemetry-ruby +[license-github]: https://github.com/open-telemetry/opentelemetry-ruby-contrib/blob/main/LICENSE +[ruby-sig]: https://github.com/open-telemetry/community#ruby-sig +[community-meetings]: https://github.com/open-telemetry/community#community-meetings +[ruby-gitter]: https://gitter.im/open-telemetry/opentelemetry-ruby diff --git a/helpers/mysql/Rakefile b/helpers/mysql/Rakefile new file mode 100644 index 000000000..1a64ba842 --- /dev/null +++ b/helpers/mysql/Rakefile @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'bundler/gem_tasks' +require 'rake/testtask' +require 'yard' +require 'rubocop/rake_task' + +RuboCop::RakeTask.new + +Rake::TestTask.new :test do |t| + t.libs << 'test' + t.libs << 'lib' + t.test_files = FileList['test/**/*_test.rb'] +end + +YARD::Rake::YardocTask.new do |t| + t.stats_options = ['--list-undoc'] +end + +if RUBY_ENGINE == 'truffleruby' + task default: %i[test] +else + task default: %i[test rubocop yard] +end diff --git a/helpers/mysql/lib/opentelemetry-helpers-mysql.rb b/helpers/mysql/lib/opentelemetry-helpers-mysql.rb new file mode 100644 index 000000000..309ce4492 --- /dev/null +++ b/helpers/mysql/lib/opentelemetry-helpers-mysql.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require_relative 'opentelemetry/helpers/mysql' diff --git a/helpers/mysql/lib/opentelemetry/helpers.rb b/helpers/mysql/lib/opentelemetry/helpers.rb new file mode 100644 index 000000000..0c336fa32 --- /dev/null +++ b/helpers/mysql/lib/opentelemetry/helpers.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'opentelemetry' +require 'opentelemetry/helpers/mysql' + +module OpenTelemetry + # The helpers module contains functionality shared across multiple + # instrumentation libraries + module Helpers + end +end diff --git a/helpers/mysql/lib/opentelemetry/helpers/mysql.rb b/helpers/mysql/lib/opentelemetry/helpers/mysql.rb new file mode 100644 index 000000000..27eeb3984 --- /dev/null +++ b/helpers/mysql/lib/opentelemetry/helpers/mysql.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0module OpenTelemetry +require 'opentelemetry-common' + +module OpenTelemetry + module Helpers + # This module contains helpers for MySQL instrumentation libraries, like mysql2 and + # trilogy. It is intended for use by instrumentation developers and not to + # be called directly by an application. + # + # To use this in your instrumentation, the `Instrumentation` class for + # your gem must contain configuration options for: + # * `:span_name` + # Example: + # `option :span_name, default: :statement_type, validate: %I[statement_type db_name db_operation_and_name]` + # + # @api public + module MySQL + module_function + + QUERY_NAMES = [ + 'set names', + 'select', + 'insert', + 'update', + 'delete', + 'begin', + 'commit', + 'rollback', + 'savepoint', + 'release savepoint', + 'explain', + 'drop database', + 'drop table', + 'create database', + 'create table' + ].freeze + + # Ignore query names that might appear in comments prepended to the + # statement. + PREPENDED_COMMENTS_REGEX = %r{(?:\/\*.*?\*\/)}m + QUERY_NAME_REGEX = Regexp.new("^\s*(?:#{PREPENDED_COMMENTS_REGEX})?\s*\\b(#{QUERY_NAMES.join('|')})\\b.*", Regexp::IGNORECASE) + + # This is a span naming utility intended for use in MySQL database + # adapter instrumentation. + # + # @param sql [String] The SQL statement for the span. + # @param operation [String] The database operation. + # @param database_name [String] The name of the database. + # @param config [Hash] The user's configuration for the database adapter. + # Desired keys: + # * `:span_name` => A symbol describing the type of name desired. Expected options are `:statement_type`, `:db_name`, and `:db_operation_and_name`. A nil or unknown `:span_name` will return 'mysql' as the span name + # @return [String] The span name. + # @api public + def database_span_name(sql, operation, database_name, config) + case config[:span_name] + when :statement_type + extract_statement_type(sql) + when :db_name + database_name + when :db_operation_and_name + db_operation_and_name(operation, database_name) + end || 'mysql' + end + + # @api private + def extract_statement_type(sql) + sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) + + QUERY_NAME_REGEX.match(sql) { |match| match[1].downcase } unless sql.nil? + rescue StandardError => e + OpenTelemetry.handle_error(message: 'Error extracting SQL statement type', exception: e) + nil + end + + # @api private + def db_operation_and_name(operation, database_name) + if operation && database_name + "#{operation} #{database_name}" + elsif operation + operation + elsif database_name + database_name + end + end + end + end +end diff --git a/helpers/mysql/lib/opentelemetry/helpers/mysql/version.rb b/helpers/mysql/lib/opentelemetry/helpers/mysql/version.rb new file mode 100644 index 000000000..f66da3eea --- /dev/null +++ b/helpers/mysql/lib/opentelemetry/helpers/mysql/version.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +module OpenTelemetry + module Helpers + module MySQL + VERSION = '0.1.0' + end + end +end diff --git a/helpers/mysql/opentelemetry-helpers-mysql.gemspec b/helpers/mysql/opentelemetry-helpers-mysql.gemspec new file mode 100644 index 000000000..6b2882279 --- /dev/null +++ b/helpers/mysql/opentelemetry-helpers-mysql.gemspec @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +lib = File.expand_path('lib', __dir__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'opentelemetry/helpers/mysql/version' + +Gem::Specification.new do |spec| + spec.name = 'opentelemetry-helpers-mysql' + spec.version = OpenTelemetry::Helpers::MySQL::VERSION + spec.authors = ['OpenTelemetry Authors'] + spec.email = ['cncf-opentelemetry-contributors@lists.cncf.io'] + + spec.summary = 'MySQL Instrumentation Helpers for the OpenTelemetry framework' + spec.description = 'MySQL Instrumentation Helpers for the OpenTelemetry framework' + spec.homepage = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib' + spec.license = 'Apache-2.0' + + spec.files = Dir.glob('lib/**/*.rb') + + Dir.glob('*.md') + + ['LICENSE', '.yardopts'] + spec.require_paths = ['lib'] + spec.required_ruby_version = '>= 3.0' + + spec.add_dependency 'opentelemetry-api', '~> 1.0' + spec.add_dependency 'opentelemetry-common', '~> 0.20' + + spec.add_development_dependency 'bundler', '~> 2.4' + spec.add_development_dependency 'minitest', '~> 5.0' + spec.add_development_dependency 'opentelemetry-test-helpers', '~> 0.3' + spec.add_development_dependency 'rake', '~> 13.0' + spec.add_development_dependency 'rubocop', '~> 1.57.2' + spec.add_development_dependency 'rubocop-performance', '~> 1.19.1' + spec.add_development_dependency 'simplecov', '~> 0.22.0' + spec.add_development_dependency 'yard', '~> 0.9' + spec.add_development_dependency 'yard-doctest', '~> 0.1.6' + + if spec.respond_to?(:metadata) + spec.metadata['changelog_uri'] = "https://rubydoc.info/gems/#{spec.name}/#{spec.version}/file/CHANGELOG.md" + spec.metadata['source_code_uri'] = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib/tree/main/helpers/mysql' + spec.metadata['bug_tracker_uri'] = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib/issues' + spec.metadata['documentation_uri'] = "https://rubydoc.info/gems/#{spec.name}/#{spec.version}" + end +end diff --git a/helpers/mysql/test/helpers/mysql_test.rb b/helpers/mysql/test/helpers/mysql_test.rb new file mode 100644 index 000000000..e72d1ca86 --- /dev/null +++ b/helpers/mysql/test/helpers/mysql_test.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'test_helper' + +describe OpenTelemetry::Helpers::MySQL do + describe '.database_span_name' do + let(:sql) { 'SELECT * FROM users' } + let(:operation) { 'operation' } + let(:database_name) { 'database_name' } + let(:config) { { span_name: span_name } } + let(:database_span_name) { OpenTelemetry::Helpers::MySQL.database_span_name(sql, operation, database_name, config) } + + describe 'when config[:span_name] is :statement_type' do + let(:span_name) { :statement_type } + + it 'returns the statement type' do + assert_equal(database_span_name, 'select') + end + end + + describe 'when config[:span_name] is :db_name' do + let(:span_name) { :db_name } + + it 'returns database name' do + assert_equal(database_span_name, database_name) + end + end + + describe 'when config[:span_name] is :db_operation_and_name' do + let(:span_name) { :db_operation_and_name } + + it 'returns db operation and name' do + assert_equal(database_span_name, 'operation database_name') + end + end + + describe 'when config[:span_name] does not match a case' do + let(:span_name) { 'something_unexpected' } + + it 'returns mysql' do + assert_equal(database_span_name, 'mysql') + end + end + + describe 'when config[:span_name] is nil' do + let(:span_name) { nil } + + it 'returns mysql' do + assert_equal(database_span_name, 'mysql') + end + end + end + + describe '.db_operation_and_name' do + let(:operation) { 'operation' } + let(:database_name) { 'database_name' } + let(:db_operation_and_name) { OpenTelemetry::Helpers::MySQL.db_operation_and_name(operation, database_name) } + + describe 'when operation and database_name are present' do + it 'returns a combination of the operation and database_name' do + assert_equal(db_operation_and_name, 'operation database_name') + end + end + + describe 'when operation is nil' do + let(:operation) { nil } + + it 'returns database_name' do + assert_equal(db_operation_and_name, database_name) + end + end + + describe 'when database_name is nil' do + let(:database_name) { nil } + + it 'returns the operation name' do + assert_equal(db_operation_and_name, operation) + end + end + + describe 'when both database_name and operation are nil' do + let(:database_name) { nil } + let(:operation) { nil } + + it 'returns nil' do + assert_nil(db_operation_and_name) + end + end + end + + describe '.extract_statement_type' do + let(:sql) { 'SELECT * FROM users' } + let(:extract_statement_type) { OpenTelemetry::Helpers::MySQL.extract_statement_type(sql) } + + describe 'when it finds a match' do + it 'returns the query name' do + assert_equal('select', extract_statement_type) + end + end + + describe 'when sql contains invalid byte sequences' do + let(:sql) { "SELECT * from users where users.id = 1 and users.email = 'test@test.com\255'" } + + it 'extracts the statement' do + assert_equal('select', extract_statement_type) + end + end + + describe 'when sql contains unknown query statement' do + let(:sql) { 'DESELECT 1' } + + # nil sets the span name to 'mysql' + it 'returns nil' do + assert_nil(extract_statement_type) + end + end + + describe 'when sql contains multiple query statements' do + let(:sql) { 'EXPLAIN SELECT 1' } + + it 'extracts the statement type that begins the query' do + assert_equal('explain', extract_statement_type) + end + end + + describe 'when sql with marginalia-style prepended comments includes a query statement' do + let(:sql) do + "/*action='update',application='TrilogyTest',controller='users'*/ SELECT `users`.* FROM `users` WHERE `users`.`id` = 1 LIMIT 1" + end + + it 'does not capture the query statement within the comment' do + assert_equal('select', extract_statement_type) + end + end + + describe 'when sql is nil' do + let(:sql) { nil } + + it 'returns nil' do + assert_nil(extract_statement_type) + end + end + + describe 'when an error is raised' do + it 'logs a message' do + result = nil + OpenTelemetry::TestHelpers.with_test_logger do |log_stream| + OpenTelemetry::Common::Utilities.stub(:utf8_encode, ->(_) { raise 'boom!' }) do + extract_statement_type + + assert_nil(result) + assert_match(/Error extracting/, log_stream.string) + end + end + end + end + end +end diff --git a/helpers/mysql/test/test_helper.rb b/helpers/mysql/test/test_helper.rb new file mode 100644 index 000000000..0f824f6b8 --- /dev/null +++ b/helpers/mysql/test/test_helper.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'bundler/setup' +Bundler.require(:default, :development, :test) + +require 'opentelemetry' +require 'opentelemetry-helpers-mysql' +require 'minitest/autorun' + +OpenTelemetry.logger = Logger.new($stderr, level: ENV.fetch('OTEL_LOG_LEVEL', 'fatal').to_sym) diff --git a/helpers/sql-obfuscation/.rubocop.yml b/helpers/sql-obfuscation/.rubocop.yml new file mode 100644 index 000000000..4b31975de --- /dev/null +++ b/helpers/sql-obfuscation/.rubocop.yml @@ -0,0 +1,4 @@ +inherit_from: ../../.rubocop.yml + +Gemspec/DevelopmentDependencies: + Enabled: false diff --git a/helpers/sql-obfuscation/.yardopts b/helpers/sql-obfuscation/.yardopts new file mode 100644 index 000000000..d90322845 --- /dev/null +++ b/helpers/sql-obfuscation/.yardopts @@ -0,0 +1,9 @@ +--no-private +--title=OpenTelemetry SQL Obfuscation Instrumentation Helpers +--markup=markdown +--main=README.md +./lib/opentelemetry/helpers/**/*.rb +./lib/opentelemetry/helpers.rb +- +README.md +CHANGELOG.md diff --git a/helpers/sql-obfuscation/CHANGELOG.md b/helpers/sql-obfuscation/CHANGELOG.md new file mode 100644 index 000000000..803cd23c7 --- /dev/null +++ b/helpers/sql-obfuscation/CHANGELOG.md @@ -0,0 +1 @@ +# Release History: opentelemetry-helpers-sql-obfuscation diff --git a/helpers/sql-obfuscation/Gemfile b/helpers/sql-obfuscation/Gemfile new file mode 100644 index 000000000..f649e2f64 --- /dev/null +++ b/helpers/sql-obfuscation/Gemfile @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +source 'https://rubygems.org' + +gemspec diff --git a/helpers/sql-obfuscation/LICENSE b/helpers/sql-obfuscation/LICENSE new file mode 100644 index 000000000..1ef7dad2c --- /dev/null +++ b/helpers/sql-obfuscation/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright The OpenTelemetry Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/helpers/sql-obfuscation/README.md b/helpers/sql-obfuscation/README.md new file mode 100644 index 000000000..7165c1aa6 --- /dev/null +++ b/helpers/sql-obfuscation/README.md @@ -0,0 +1,62 @@ +# OpenTelemetry Instrumentation Helpers: SQL Obfuscation + +This Ruby gem contains logic to obfuscate SQL. It's intended for use by by gem authors instrumenting SQL adapter libraries, such as mysql2, pg, and trilogy. + +The logic is largely drawn from the [New Relic Ruby agent's SQL Obfuscation Helpers module][new-relic-obfuscation-helpers]. + +## Usage + +Add the gem to your instrumentation's gemspec file: + +```ruby +# opentelemetry-instrumentation-your-gem.gemspec + spec.add_dependency 'opentelemetry-helpers-sql-obfuscation' +``` + +Add the gem to your instrumentation's Gemfile: + +```ruby +# Gemfile + +group :test, :development do + gem 'opentelemetry-helpers-sql-obfuscation', path: '../../helpers/sql-obfuscation' +end +``` + +Make sure the `Instrumentation` class for your gem contains configuration options for: +- `:obfuscation_limit`: the length at which the obfuscated SQL string will be truncated. + Example: `option :obfuscation_limit, default: 2000, validate: :integer` + +If you want to add support for a new adapter, update the following constants to include keys for your adapter: + +- `DIALECT_COMPONENTS` +- `CLEANUP_REGEX` + +You must also add a new constant that calls the `generate_regex` method with your adapter's DIALECT_COMPONENTS that is named like `_COMPONENTS_REGEX`, such as: `MYSQL_COMPONENTS_REGEX`. + +Check [New Relic's SQL Obfuscation Helpers module][new-relic-obfuscation-helpers] to see if regular expressions for your adapter already exist. + +## Examples + +To obfuscate sql in your library: + +```ruby +OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, obfuscation_limit: config[:obfuscation_limit], adapter: :postgres) +``` + +## How can I get involved? + +The `opentelemetry-helpers-sql-obfuscation` gem source is [on github][repo-github], along with related gems including `opentelemetry-instrumentation-pg` and `opentelemetry-instrumentation-trilogy`. + +The OpenTelemetry Ruby gems are maintained by the OpenTelemetry-Ruby special interest group (SIG). You can get involved by joining us on our [gitter channel][ruby-gitter] or attending our weekly meeting. See the [meeting calendar][community-meetings] for dates and times. For more information on this and other language SIGs, see the OpenTelemetry [community page][ruby-sig]. + +## License + +The `opentelemetry-helpers-sql-obfuscation` gem is distributed under the Apache 2.0 license. See [LICENSE][license-github] for more information. + +[new-relic-obfuscation-helpers]: https://github.com/newrelic/newrelic-ruby-agent/blob/96e7aca22c1c873c0f5fe704a2b3bb19652db68e/lib/new_relic/agent/database/obfuscation_helpers.rb +[repo-github]: https://github.com/open-telemetry/opentelemetry-ruby +[license-github]: https://github.com/open-telemetry/opentelemetry-ruby-contrib/blob/main/LICENSE +[ruby-sig]: https://github.com/open-telemetry/community#ruby-sig +[community-meetings]: https://github.com/open-telemetry/community#community-meetings +[ruby-gitter]: https://gitter.im/open-telemetry/opentelemetry-ruby diff --git a/helpers/sql-obfuscation/Rakefile b/helpers/sql-obfuscation/Rakefile new file mode 100644 index 000000000..1a64ba842 --- /dev/null +++ b/helpers/sql-obfuscation/Rakefile @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'bundler/gem_tasks' +require 'rake/testtask' +require 'yard' +require 'rubocop/rake_task' + +RuboCop::RakeTask.new + +Rake::TestTask.new :test do |t| + t.libs << 'test' + t.libs << 'lib' + t.test_files = FileList['test/**/*_test.rb'] +end + +YARD::Rake::YardocTask.new do |t| + t.stats_options = ['--list-undoc'] +end + +if RUBY_ENGINE == 'truffleruby' + task default: %i[test] +else + task default: %i[test rubocop yard] +end diff --git a/helpers/sql-obfuscation/lib/opentelemetry-helpers-sql-obfuscation.rb b/helpers/sql-obfuscation/lib/opentelemetry-helpers-sql-obfuscation.rb new file mode 100644 index 000000000..a839b7e58 --- /dev/null +++ b/helpers/sql-obfuscation/lib/opentelemetry-helpers-sql-obfuscation.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require_relative 'opentelemetry/helpers' diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers.rb new file mode 100644 index 000000000..671d2b6e1 --- /dev/null +++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'opentelemetry/helpers/sql_obfuscation' + +module OpenTelemetry + # The helpers module contains functionality shared across multiple + # instrumentation libraries + module Helpers + end +end diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation.rb new file mode 100644 index 000000000..da0394b42 --- /dev/null +++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0module OpenTelemetry + +require 'opentelemetry-common' + +module OpenTelemetry + module Helpers + # + # This module contains SQL obfuscation behavior to share with + # instrumentation for specific database adapters. + # The class uses code from: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb + # + # To use this in your instrumentation, the `Instrumentation` class for + # your gem must contain configuration options for: + # * `:db_statement` + # Example: + # `option :db_statement, default: :include, validate: %I[omit include obfuscate]` + # * `:obfuscation_limit` + # Example: + # `option :obfuscation_limit, default: 2000, validate: :integer` + # + # If you want to add support for a new adapter, update the following + # constants to include keys for your adapter: + # * DIALECT_COMPONENTS + # * CLEANUP_REGEX + # You must also add a new constant that uses `generate_regex` with your + # adapter's dialect components that is named like + # `_COMPONENTS_REGEX`, such as: `MYSQL_COMPONENTS_REGEX`. + # + # @api public + module SqlObfuscation + module_function + + # From: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb + COMPONENTS_REGEX_MAP = { + single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/, + double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/, + dollar_quotes: /(\$(?!\d)[^$]*?\$).*?(?:\1|$)/, + uuids: /\{?(?:[0-9a-fA-F]\-*){32}\}?/, + numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/, + boolean_literals: /\b(?:true|false|null)\b/i, + hexadecimal_literals: /0x[0-9a-fA-F]+/, + comments: /(?:#|--).*?(?=\r|\n|$)/i, + multi_line_comments: %r{(?:\/\*.*?\*\/)}m, + oracle_quoted_strings: /q'\[.*?(?:\]'|$)|q'\{.*?(?:\}'|$)|q'\<.*?(?:\>'|$)|q'\(.*?(?:\)'|$)/ + }.freeze + + DIALECT_COMPONENTS = { + default: COMPONENTS_REGEX_MAP.keys, + mysql: %i[single_quotes double_quotes numeric_literals boolean_literals + hexadecimal_literals comments multi_line_comments], + postgres: %i[single_quotes dollar_quotes uuids numeric_literals + boolean_literals comments multi_line_comments], + sqlite: %i[single_quotes numeric_literals boolean_literals hexadecimal_literals + comments multi_line_comments], + oracle: %i[single_quotes oracle_quoted_strings numeric_literals comments + multi_line_comments], + cassandra: %i[single_quotes uuids numeric_literals boolean_literals + hexadecimal_literals comments multi_line_comments] + }.freeze + + PLACEHOLDER = '?' + + # We use these to check whether the query contains any quote characters + # after obfuscation. If so, that's a good indication that the original + # query was malformed, and so our obfuscation can't reliably find + # literals. In such a case, we'll replace the entire query with a + # placeholder. + CLEANUP_REGEX = { + default: %r{'|"|\/\*|\*\/}, + mysql: %r{'|"|\/\*|\*\//}, + postgres: %r{'|\/\*|\*\/|\$(?!\?)/}, + sqlite: %r{'|\/\*|\*\//}, + cassandra: %r{'|\/\*|\*\//}, + oracle: %r{'|\/\*|\*\//} + }.freeze + + # @api private + def generate_regex(dialect) + components = DIALECT_COMPONENTS[dialect] + Regexp.union(components.map { |component| COMPONENTS_REGEX_MAP[component] }) + end + + DEFAULT_COMPONENTS_REGEX = generate_regex(:default) + MYSQL_COMPONENTS_REGEX = generate_regex(:mysql) + POSTGRES_COMPONENTS_REGEX = generate_regex(:postgres) + SQLITE_COMPONENTS_REGEX = generate_regex(:sqlite) + CASSANDRA_COMPONENTS_REGEX = generate_regex(:cassandra) + ORACLE_COMPONENTS_REGEX = generate_regex(:oracle) + + # This is a SQL obfuscation utility intended for use in database adapter instrumentation. + # + # @param sql [String] The SQL to obfuscate. + # @param obfuscation_limit [optional Integer] The maximum length of an obfuscated sql statement. + # @param adapter [optional Symbol] the type of database adapter calling the method. `:default`, `:mysql` and `:postgres` are supported. + # @return [String] The SQL query string where the values are replaced with "?". When the sql statement exceeds the obufscation limit + # the first matched pair from the SQL statement will be returned, with an appended truncation message. If trunaction is unsuccessful, + # a string describing the error will be returned. + # + # @api public + def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default) + regex = case adapter + when :mysql + MYSQL_COMPONENTS_REGEX + when :postgres + POSTGRES_COMPONENTS_REGEX + else + DEFAULT_COMPONENTS_REGEX + end + + # Original MySQL UTF-8 Encoding Fixes: + # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/160 + # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/345 + sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) + return truncate_statement(sql, regex, obfuscation_limit) if sql.size > obfuscation_limit + + sql = sql.gsub(regex, PLACEHOLDER) + return 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if CLEANUP_REGEX[adapter].match(sql) + + sql + rescue StandardError => e + OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e) + end + + # @api private + def truncate_statement(sql, regex, limit) + first_match_index = sql.index(regex) + truncation_message = "SQL truncated (> #{limit} characters)" + return truncation_message unless first_match_index + + truncated_sql = sql[..first_match_index - 1] + "#{truncated_sql}...\n#{truncation_message}" + end + end + end +end diff --git a/helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation/version.rb b/helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation/version.rb new file mode 100644 index 000000000..9861b8920 --- /dev/null +++ b/helpers/sql-obfuscation/lib/opentelemetry/helpers/sql_obfuscation/version.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +module OpenTelemetry + module Helpers + module SqlObfuscation + VERSION = '0.1.0' + end + end +end diff --git a/helpers/sql-obfuscation/opentelemetry-helpers-sql-obfuscation.gemspec b/helpers/sql-obfuscation/opentelemetry-helpers-sql-obfuscation.gemspec new file mode 100644 index 000000000..6cf7cd965 --- /dev/null +++ b/helpers/sql-obfuscation/opentelemetry-helpers-sql-obfuscation.gemspec @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +lib = File.expand_path('lib', __dir__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'opentelemetry/helpers/sql_obfuscation/version' + +Gem::Specification.new do |spec| + spec.name = 'opentelemetry-helpers-sql-obfuscation' + spec.version = OpenTelemetry::Helpers::SqlObfuscation::VERSION + spec.authors = ['OpenTelemetry Authors'] + spec.email = ['cncf-opentelemetry-contributors@lists.cncf.io'] + + spec.summary = 'SQL Obfuscation Instrumentation Helpers for the OpenTelemetry framework' + spec.description = 'SQL Obfuscation Instrumentation Helpers for the OpenTelemetry framework' + spec.homepage = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib' + spec.license = 'Apache-2.0' + + spec.files = Dir.glob('lib/**/*.rb') + + Dir.glob('*.md') + + ['LICENSE', '.yardopts'] + spec.require_paths = ['lib'] + spec.required_ruby_version = '>= 3.0' + + spec.add_dependency 'opentelemetry-common', '~> 0.20' + + spec.add_development_dependency 'bundler', '~> 2.4' + spec.add_development_dependency 'minitest', '~> 5.0' + spec.add_development_dependency 'opentelemetry-test-helpers', '~> 0.3' + spec.add_development_dependency 'rake', '~> 13.0' + spec.add_development_dependency 'rubocop', '~> 1.57.2' + spec.add_development_dependency 'rubocop-performance', '~> 1.19.1' + spec.add_development_dependency 'yard', '~> 0.9' + spec.add_development_dependency 'yard-doctest', '~> 0.1.6' + + if spec.respond_to?(:metadata) + spec.metadata['changelog_uri'] = "https://rubydoc.info/gems/#{spec.name}/#{spec.version}/file/CHANGELOG.md" + spec.metadata['source_code_uri'] = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib/tree/main/helpers/sql-obfuscation' + spec.metadata['bug_tracker_uri'] = 'https://github.com/open-telemetry/opentelemetry-ruby-contrib/issues' + spec.metadata['documentation_uri'] = "https://rubydoc.info/gems/#{spec.name}/#{spec.version}" + end +end diff --git a/helpers/sql-obfuscation/test/fixtures/sql_obfuscation.json b/helpers/sql-obfuscation/test/fixtures/sql_obfuscation.json new file mode 100644 index 000000000..9c75b8f16 --- /dev/null +++ b/helpers/sql-obfuscation/test/fixtures/sql_obfuscation.json @@ -0,0 +1,685 @@ +[ + { + "name": "back_quoted_identifiers.mysql", + "obfuscated": [ + "SELECT `t001`.`c2` FROM `t001` WHERE `t001`.`c2` = ? AND c3=? LIMIT ?" + ], + "dialects": [ + "mysql" + ], + "sql": "SELECT `t001`.`c2` FROM `t001` WHERE `t001`.`c2` = 'value' AND c3=\"othervalue\" LIMIT ?" + }, + { + "name": "comment_delimiters_in_double_quoted_strings", + "obfuscated": [ + "SELECT * FROM t WHERE foo=? AND baz=?" + ], + "dialects": [ + "mysql" + ], + "sql": "SELECT * FROM t WHERE foo=\"bar/*\" AND baz=\"whatever */qux\"" + }, + { + "name": "comment_delimiters_in_single_quoted_strings", + "obfuscated": [ + "SELECT * FROM t WHERE foo=? AND baz=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM t WHERE foo='bar/*' AND baz='whatever */qux'" + }, + { + "name": "double_quoted_identifiers.postgres", + "obfuscated": [ + "SELECT \"t001\".\"c2\" FROM \"t001\" WHERE \"t001\".\"c2\" = ? AND c3=? LIMIT ?" + ], + "dialects": [ + "postgres" + ], + "sql": "SELECT \"t001\".\"c2\" FROM \"t001\" WHERE \"t001\".\"c2\" = 'value' AND c3=1234 LIMIT 1" + }, + { + "name": "end_of_line_comment_in_double_quoted_string", + "obfuscated": [ + "SELECT * FROM t WHERE foo=? AND\n baz=?" + ], + "dialects": [ + "mysql" + ], + "sql": "SELECT * FROM t WHERE foo=\"bar--\" AND\n baz=\"qux--\"" + }, + { + "name": "end_of_line_comment_in_single_quoted_string", + "obfuscated": [ + "SELECT * FROM t WHERE foo=? AND\n baz=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM t WHERE foo='bar--' AND\n baz='qux--'" + }, + { + "name": "end_of_query_comment_cstyle", + "obfuscated": [ + "SELECT * FROM foo WHERE bar=? ?", + "SELECT * FROM foo WHERE bar=? " + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM foo WHERE bar='baz' /* Hide Me */" + }, + { + "name": "end_of_query_comment_doubledash", + "obfuscated": [ + "SELECT * FROM foobar WHERE password=?\n?", + "SELECT * FROM foobar WHERE password=?\n" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM foobar WHERE password='secret2'\n-- No peeking!" + }, + { + "name": "end_of_query_comment_hash", + "obfuscated": [ + "SELECT foo, bar FROM baz WHERE password=? ?", + "SELECT foo, bar FROM baz WHERE password=? " + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT foo, bar FROM baz WHERE password='secret2' # Secret" + }, + { + "name": "escape_string_constants.postgres", + "sql": "SELECT \"col1\", \"col2\" from \"table\" WHERE \"col3\"=E'foo\\'bar\\\\baz' AND country=e'foo\\'bar\\\\baz'", + "obfuscated": [ + "SELECT \"col1\", \"col2\" from \"table\" WHERE \"col3\"=E?", + "SELECT \"col1\", \"col2\" from \"table\" WHERE \"col3\"=E? AND country=e?" + ], + "dialects": [ + "postgres" + ], + "comments": [ + "PostgreSQL supports an alternate string quoting mode where backslash escape", + "sequences are interpreted.", + "See: http://www.postgresql.org/docs/9.3/static/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE" + ] + }, + { + "name": "multiple_literal_types.mysql", + "obfuscated": [ + "INSERT INTO `X` values(?,?, ? , ?, ?)" + ], + "dialects": [ + "mysql" + ], + "sql": "INSERT INTO `X` values(\"test\",0, 1 , 2, 'test')" + }, + { + "name": "numbers_in_identifiers", + "obfuscated": [ + "SELECT c11.col1, c22.col2 FROM table c11, table c22 WHERE value=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT c11.col1, c22.col2 FROM table c11, table c22 WHERE value='nothing'" + }, + { + "name": "numeric_literals", + "sql": "INSERT INTO X VALUES(1, 23456, 123.456, 99+100)", + "obfuscated": [ + "INSERT INTO X VALUES(?, ?, ?, ?+?)", + "INSERT INTO X VALUES(?, ?, ?.?, ?+?)" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "string_double_quoted.mysql", + "obfuscated": [ + "SELECT * FROM table WHERE name=? AND value=?" + ], + "dialects": [ + "mysql" + ], + "sql": "SELECT * FROM table WHERE name=\"foo\" AND value=\"don't\"" + }, + { + "name": "string_single_quoted", + "obfuscated": [ + "SELECT * FROM table WHERE name=? AND value = ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM table WHERE name='foo' AND value = 'bar'" + }, + { + "name": "string_with_backslash_and_twin_single_quotes", + "obfuscated": [ + "SELECT * FROM table WHERE col=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM table WHERE col='foo\\''bar'", + "comments": [ + "If backslashes are being ignored in single-quoted strings", + "(standard_conforming_strings=on in PostgreSQL, or NO_BACKSLASH_ESCAPES is on", + "in MySQL), then this is valid SQL." + ] + }, + { + "name": "string_with_embedded_double_quote", + "obfuscated": [ + "SELECT * FROM table WHERE col1=? AND col2=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM table WHERE col1='foo\"bar' AND col2='what\"ever'" + }, + { + "name": "string_with_embedded_newline", + "obfuscated": [ + "select * from accounts where accounts.name != ? order by accounts.name" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "select * from accounts where accounts.name != 'dude \n newline' order by accounts.name" + }, + { + "name": "string_with_embedded_single_quote.mysql", + "obfuscated": [ + "SELECT * FROM table WHERE col1=? AND col2=?" + ], + "dialects": [ + "mysql" + ], + "sql": "SELECT * FROM table WHERE col1=\"don't\" AND col2=\"won't\"" + }, + { + "name": "string_with_escaped_quotes.mysql", + "sql": "INSERT INTO X values('', 'jim''s ssn',0, 1 , 'jim''s son''s son', \"\"\"jim''s\"\" hat\", \"\\\"jim''s secret\\\"\")", + "obfuscated": [ + "INSERT INTO X values(?, ?,?, ? , ?, ?, ?", + "INSERT INTO X values(?, ?,?, ? , ?, ?, ?)" + ], + "dialects": [ + "mysql" + ] + }, + { + "name": "string_with_trailing_backslash", + "sql": "SELECT * FROM table WHERE name='foo\\' AND color='blue'", + "obfuscated": [ + "SELECT * FROM table WHERE name=?", + "SELECT * FROM table WHERE name=? AND color=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "comments": [ + "If backslashes are being ignored in single-quoted strings", + "(standard_conforming_strings=on in PostgreSQL, or NO_BACKSLASH_ESCAPES is on", + "in MySQL), then this is valid SQL." + ] + }, + { + "name": "string_with_trailing_escaped_backslash.mysql", + "obfuscated": [ + "SELECT * FROM table WHERE foo=?" + ], + "dialects": [ + "mysql" + ], + "sql": "SELECT * FROM table WHERE foo=\"this string ends with a backslash\\\\\"" + }, + { + "name": "string_with_trailing_escaped_backslash_single_quoted", + "obfuscated": [ + "SELECT * FROM table WHERE foo=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "SELECT * FROM table WHERE foo='this string ends with a backslash\\\\'" + }, + { + "name": "string_with_trailing_escaped_quote", + "sql": "SELECT * FROM table WHERE name='foo\\'' AND color='blue'", + "obfuscated": [ + "SELECT * FROM table WHERE name=?", + "SELECT * FROM table WHERE name=? AND color=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "string_with_twin_single_quotes", + "obfuscated": [ + "INSERT INTO X values(?, ?,?, ? , ?)" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "sql": "INSERT INTO X values('', 'a''b c',0, 1 , 'd''e f''s h')" + }, + { + "name": "end_of_line_comments_with_quotes", + "sql": "SELECT * FROM t WHERE -- '\n bar='baz' -- '", + "obfuscated": [ + "SELECT * FROM t WHERE ?\n bar=? ?", + "SELECT * FROM t WHERE ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "mixed_comments_and_quotes", + "sql": "SELECT * FROM t WHERE /* ' */ \n bar='baz' -- '", + "obfuscated": [ + "SELECT * FROM t WHERE ? \n bar=? ?", + "SELECT * FROM t WHERE ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "mixed_quotes_comments_and_newlines", + "sql": "SELECT * FROM t WHERE -- '\n /* ' */ c2='xxx' /* ' */\n c='x\n xx' -- '", + "obfuscated": [ + "SELECT * FROM t WHERE ?\n ? c2=? ?\n c=? ?", + "SELECT * FROM t WHERE ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "mixed_quotes_end_of_line_comments", + "sql": "SELECT * FROM t WHERE -- '\n c='x\n xx' -- '", + "obfuscated": [ + "SELECT * FROM t WHERE ?\n c=? ?", + "SELECT * FROM t WHERE ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "quote_delimiters_in_comments", + "sql": "SELECT * FROM foo WHERE col='value1' AND /* don't */ col2='value1' /* won't */", + "obfuscated": [ + "SELECT * FROM foo WHERE col=? AND ? col2=? ?", + "SELECT * FROM foo WHERE col=? AND ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "malformed/unterminated_double_quoted_string.mysql", + "sql": "SELECT * FROM table WHERE foo='bar' AND baz=\"nothing to see here'", + "dialects": [ + "mysql" + ], + "obfuscated": [ + "?" + ], + "malformed": true + }, + { + "name": "malformed/unterminated_single_quoted_string", + "sql": "SELECT * FROM table WHERE foo='bar' AND baz='nothing to see here", + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ], + "obfuscated": [ + "?" + ], + "malformed": true + }, + { + "name": "dollar_quotes", + "sql": "SELECT * FROM \"foo\" WHERE \"foo\" = $a$dollar quotes can be $b$nested$b$$a$ and bar = 'baz'", + "obfuscated": [ + "SELECT * FROM \"foo\" WHERE \"foo\" = ? and bar = ?" + ], + "dialects": [ + "postgres" + ] + }, + { + "name": "variable_substitution_not_mistaken_for_dollar_quotes", + "sql": "INSERT INTO \"foo\" (\"bar\", \"baz\", \"qux\") VALUES ($1, $2, $3) RETURNING \"id\"", + "obfuscated": [ + "INSERT INTO \"foo\" (\"bar\", \"baz\", \"qux\") VALUES ($?, $?, $?) RETURNING \"id\"" + ], + "dialects": [ + "postgres" + ] + }, + { + "name": "non_quote_escape", + "sql": "select * from foo where bar = 'some\\tthing' and baz = 10", + "obfuscated": [ + "select * from foo where bar = ? and baz = ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "end_of_string_backslash_and_line_comment_with_quite", + "sql": "select * from users where user = 'user1\\' password = 'secret 2' -- ->don't count this quote", + "obfuscated": [ + "select * from users where user = ?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "oracle_bracket_quote", + "sql": "select * from foo where bar=q'[baz's]' and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "oracle" + ] + }, + { + "name": "oracle_brace_quote", + "sql": "select * from foo where bar=q'{baz's}' and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "oracle" + ] + }, + { + "name": "oracle_angle_quote", + "sql": "select * from foo where bar=q'' and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "oracle" + ] + }, + { + "name": "oracle_paren_quote", + "sql": "select * from foo where bar=q'(baz's)' and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "oracle" + ] + }, + { + "name": "cassandra_blobs", + "sql": "select * from foo where bar=0xabcdef123 and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "cassandra", + "sqlite" + ] + }, + { + "name": "hex_literals", + "sql": "select * from foo where bar=0x2F and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "mysql", + "cassandra", + "sqlite" + ] + }, + { + "name": "exponential_literals", + "sql": "select * from foo where bar=1.234e-5 and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "negative_integer_literals", + "sql": "select * from foo where bar=-1.234e-5 and x=-5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra", + "sqlite" + ] + }, + { + "name": "uuid", + "sql": "select * from foo where bar=01234567-89ab-cdef-0123-456789abcdef and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "postgres", + "cassandra" + ] + }, + { + "name": "uuid_with_braces", + "sql": "select * from foo where bar={01234567-89ab-cdef-0123-456789abcdef} and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "postgres" + ] + }, + { + "name": "uuid_no_dashes", + "sql": "select * from foo where bar=0123456789abcdef0123456789abcdef and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "postgres" + ] + }, + { + "name": "uuid_random_dashes", + "sql": "select * from foo where bar={012-345678-9abc-def012345678-9abcdef} and x=5", + "obfuscated": [ + "select * from foo where bar=? and x=?" + ], + "dialects": [ + "postgres" + ] + }, + { + "name": "booleans", + "sql": "select * from truestory where bar=true and x=FALSE", + "obfuscated": [ + "select * from truestory where bar=? and x=?" + ], + "dialects": [ + "mysql", + "postgres", + "cassandra", + "sqlite" + ] + }, + { + "name": "in_clause_digits", + "sql": "select * from foo where bar IN (123, 456, 789)", + "obfuscated": [ + "select * from foo where bar IN (?, ?, ?)" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra" + ] + }, + { + "name": "in_clause_strings", + "sql": "select * from foo where bar IN ('asdf', 'fdsa')", + "obfuscated": [ + "select * from foo where bar IN (?, ?)" + ], + "dialects": [ + "mysql", + "postgres", + "oracle", + "cassandra" + ] + }, + { + "name": "prepended_comments_with_quotes.postgres", + "sql": "/*application:Demo,controller:posts,action:update*/ UPDATE \"posts\" SET \"updated_at\" = '2023-11-01 19:02:34.795909' WHERE \"posts\".\"id\" = 3", + "obfuscated": [ + "? UPDATE \"posts\" SET \"updated_at\" = ? WHERE \"posts\".\"id\" = ?" + ], + "dialects": [ + "postgres" + ] + }, + { + "name": "prepended_comments_with_quotes.mysql", + "sql": "/*action='show',application='TrilogyTest',controller='users'*/ SELECT `users`.* FROM `users` WHERE `users`.`id` = 1 LIMIT 1", + "obfuscated": [ + "? SELECT `users`.* FROM `users` WHERE `users`.`id` = ? LIMIT ?" + ], + "dialects": [ + "mysql" + ] + }, + { + "name": "prepended_multiline_comments_with_quotes.mysql", + "sql": "/*action='show',\napplication='TrilogyTest',controller='users'*/\nSELECT `users`.*\nFROM `users`\nWHERE `users`.`id` = 1 LIMIT 1", + "obfuscated": [ + "?\nSELECT `users`.*\nFROM `users`\nWHERE `users`.`id` = ? LIMIT ?" + ], + "dialects": [ + "mysql" + ] + } +] diff --git a/helpers/sql-obfuscation/test/helpers/sql_obfuscation_test.rb b/helpers/sql-obfuscation/test/helpers/sql_obfuscation_test.rb new file mode 100644 index 000000000..650ba63a7 --- /dev/null +++ b/helpers/sql-obfuscation/test/helpers/sql_obfuscation_test.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +# This file is distributed under New Relic's license terms. +# See https://github.com/newrelic/newrelic-ruby-agent/blob/main/LICENSE for complete details. + +require_relative '../test_helper' + +class SqlObfuscationTest < Minitest::Test + def test_named_arg_defaults_obfuscates + sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com'" + expected = 'SELECT * from users where users.id = ? and users.email = ?' + result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql) + + assert_equal(expected, result) + end + + def test_obfuscation_limit_truncates_query_after_first_match + sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com'" + expected = "SELECT * from users where users.id = ...\nSQL truncated (> 42 characters)" + result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, obfuscation_limit: 42) + + assert_equal(expected, result) + end + + def test_obfuscation_limit_truncates_when_query_not_encoded_with_utf8 + sql = "SELECT * from 😄 where users.id = 1 and users.😄 = 'test@test.com'" + expected = "SELECT * from where users.id = ...\nSQL truncated (> 42 characters)" + result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, obfuscation_limit: 42) + + assert_equal(expected, result) + end + + def test_non_utf_8_encoded_string_obfuscates_with_mysql + sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com\255'" + expected = 'SELECT * from users where users.id = ? and users.email = ?' + result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :mysql) + + assert_equal(expected, result) + end + + def test_non_utf_8_encoded_string_obfuscates_with_postgres + sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com\255'" + expected = 'SELECT * from users where users.id = ? and users.email = ?' + result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :postgres) + + assert_equal(expected, result) + end + + def test_statement_with_emoji_encodes_utf_8_and_obfuscates + sql = "SELECT * from users where users.id = 1 and users.email = 'test@😄.com'" + expected = 'SELECT * from users where users.id = ? and users.email = ?' + result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql) + + assert_equal(expected, result) + end + + # The following tests and their corresponding fixture are based on code from + # the New Relic Ruby agent. + # source: https://github.com/newrelic/newrelic-ruby-agent/blob/cb72bb5fab3fb318613421c86863a5ccdd2ff250/test/new_relic/agent/database/sql_obfuscation_test.rb + + FAILED_TO_OBFUSCATE_MESSAGE = 'Failed to obfuscate SQL query - quote characters remained after obfuscation' + + def build_failure_message(statement, dialect, acceptable_outputs, actual_output) + msg = +"Failed to obfuscate #{dialect} query correctly.\n" + msg << "Input: #{statement}\n" + if acceptable_outputs.size == 1 + msg << "Expected: #{acceptable_outputs.first}\n" + else + msg << "Acceptable outputs:\n" + acceptable_outputs.each do |output| + msg << " #{output}\n" + end + end + msg << "Actual: #{actual_output}\n" + msg + end + + def self.load_fixture + data = File.read("#{Dir.pwd}/test/fixtures/sql_obfuscation.json") + JSON.parse(data) + end + + load_fixture.each do |test_case| + name = test_case['name'] + query = test_case['sql'] + acceptable_outputs = test_case['obfuscated'] + dialects = test_case['dialects'] + + # If the entire query is obfuscated because it's malformed, we use a + # placeholder message instead of just '?', so add that to the acceptable + # outputs. + acceptable_outputs << FAILED_TO_OBFUSCATE_MESSAGE if test_case['malformed'] + + dialects.each do |dialect| + define_method("test_sql_obfuscation_#{name}_#{dialect}") do + actual_obfuscated = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(query, adapter: dialect.to_sym) + message = build_failure_message(query, dialect, acceptable_outputs, actual_obfuscated) + + assert_includes(acceptable_outputs, actual_obfuscated, message) + end + end + end + ## End New Relic tests +end diff --git a/helpers/sql-obfuscation/test/test_helper.rb b/helpers/sql-obfuscation/test/test_helper.rb new file mode 100644 index 000000000..6e84b456c --- /dev/null +++ b/helpers/sql-obfuscation/test/test_helper.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'bundler/setup' +Bundler.require(:default, :development, :test) + +require 'minitest/autorun' +require 'opentelemetry-helpers-sql-obfuscation' diff --git a/instrumentation/all/Gemfile b/instrumentation/all/Gemfile index 5cfaa5c0b..554672bec 100644 --- a/instrumentation/all/Gemfile +++ b/instrumentation/all/Gemfile @@ -9,6 +9,12 @@ source 'https://rubygems.org' gemspec group :test do + Dir.entries('../../helpers') + .select { |entry| File.directory?(File.join('../../helpers', entry)) } + .reject { |entry| %w[. ..].include?(entry) } # rubocop:disable Performance/CollectionLiteralInLoop + .sort + .each { |dir| gem "opentelemetry-helpers-#{dir}", path: "../../helpers/#{dir}" } + Dir.entries('../') .select { |entry| File.directory?(File.join('../', entry)) } .reject { |entry| %w[. .. all].include?(entry) } # rubocop:disable Performance/CollectionLiteralInLoop diff --git a/instrumentation/mysql2/Gemfile b/instrumentation/mysql2/Gemfile index b2f83af8d..6ba31761a 100644 --- a/instrumentation/mysql2/Gemfile +++ b/instrumentation/mysql2/Gemfile @@ -10,4 +10,6 @@ gemspec group :test, :development do gem 'opentelemetry-instrumentation-base', path: '../base' + gem 'opentelemetry-helpers-mysql', path: '../../helpers/mysql' + gem 'opentelemetry-helpers-sql-obfuscation', path: '../../helpers/sql-obfuscation' end diff --git a/instrumentation/mysql2/example/mysql2.rb b/instrumentation/mysql2/example/mysql2.rb index 437cec6dc..294e9575e 100644 --- a/instrumentation/mysql2/example/mysql2.rb +++ b/instrumentation/mysql2/example/mysql2.rb @@ -18,4 +18,6 @@ password: ENV.fetch('TEST_MYSQL_PASSWORD') { 'root' } ) -client.query("SELECT * FROM users WHERE group='x'") +client.query("SELECT * from information_schema.INNODB_TABLES; /**Dé**/").each do |row| + puts row +end diff --git a/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2.rb b/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2.rb index bb2b5fac4..186523915 100644 --- a/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2.rb +++ b/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2.rb @@ -20,18 +20,18 @@ module Mysql2 # Returns the attributes hash representing the Mysql2 context found # in the optional context or the current context if none is provided. # - # @param [optional Context] context The context to lookup the current + # @param context [optional Context] The context to lookup the current # attributes hash. Defaults to Context.current def attributes(context = nil) context ||= Context.current context.value(CURRENT_ATTRIBUTES_KEY) || {} end - # Returns a context containing the merged attributes hash, derived from the - # optional parent context, or the current context if one was not provided. - # - # @param [optional Context] context The context to use as the parent for + # @param attributes_hash [Hash] The attributes to add to the context + # @param parent_context [optional Context] The context to use as the parent for # the returned context + # @return A context containing the merged attributes hash, derived from the + # optional parent context, or the current context if one was not provided. def context_with_attributes(attributes_hash, parent_context: Context.current) attributes_hash = attributes(parent_context).merge(attributes_hash) parent_context.set_value(CURRENT_ATTRIBUTES_KEY, attributes_hash) diff --git a/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2/patches/client.rb b/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2/patches/client.rb index 0e4025d5b..2f58fac97 100644 --- a/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2/patches/client.rb +++ b/instrumentation/mysql2/lib/opentelemetry/instrumentation/mysql2/patches/client.rb @@ -9,58 +9,27 @@ module Instrumentation module Mysql2 module Patches # Module to prepend to Mysql2::Client for instrumentation - module Client # rubocop:disable Metrics/ModuleLength - QUERY_NAMES = [ - 'set names', - 'select', - 'insert', - 'update', - 'delete', - 'begin', - 'commit', - 'rollback', - 'savepoint', - 'release savepoint', - 'explain', - 'drop database', - 'drop table', - 'create database', - 'create table' - ].freeze - - QUERY_NAME_RE = Regexp.new("^(#{QUERY_NAMES.join('|')})", Regexp::IGNORECASE) - - # From: https://github.com/newrelic/newrelic-ruby-agent/blob/0235b288d85b8bc795bdc1a24621dd9f84cfef45/lib/new_relic/agent/database/obfuscation_helpers.rb#L9-L34 - COMPONENTS_REGEX_MAP = { - single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/, - double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/, - numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/, - boolean_literals: /\b(?:true|false|null)\b/i, - hexadecimal_literals: /0x[0-9a-fA-F]+/, - comments: /(?:#|--).*?(?=\r|\n|$)/i, - multi_line_comments: %r{\/\*(?:[^\/]|\/[^*])*?(?:\*\/|\/\*.*)} - }.freeze - - MYSQL_COMPONENTS = %i[ - single_quotes - double_quotes - numeric_literals - boolean_literals - hexadecimal_literals - comments - multi_line_comments - ].freeze - + module Client def query(sql, options = {}) attributes = client_attributes case config[:db_statement] when :include attributes[SemanticConventions::Trace::DB_STATEMENT] = sql when :obfuscate - attributes[SemanticConventions::Trace::DB_STATEMENT] = obfuscate_sql(sql) + attributes[SemanticConventions::Trace::DB_STATEMENT] = + OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql( + sql, obfuscation_limit: config[:obfuscation_limit], adapter: :mysql + ) end tracer.in_span( - database_span_name(sql), + OpenTelemetry::Helpers::MySQL.database_span_name( + sql, + OpenTelemetry::Instrumentation::Mysql2.attributes[ + SemanticConventions::Trace::DB_OPERATION + ], + database_name, + config + ), attributes: attributes.merge!(OpenTelemetry::Instrumentation::Mysql2.attributes), kind: :client ) do @@ -70,57 +39,6 @@ def query(sql, options = {}) private - def obfuscate_sql(sql) - if sql.size > config[:obfuscation_limit] - first_match_index = sql.index(generated_mysql_regex) - truncation_message = "SQL truncated (> #{config[:obfuscation_limit]} characters)" - return truncation_message unless first_match_index - - truncated_sql = sql[..first_match_index - 1] - "#{truncated_sql}...\n#{truncation_message}" - else - obfuscated = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) - obfuscated = obfuscated.gsub(generated_mysql_regex, '?') - obfuscated = 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if detect_unmatched_pairs(obfuscated) - obfuscated - end - rescue StandardError => e - OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e) - 'OpenTelemetry error: failed to obfuscate sql' - end - - def generated_mysql_regex - @generated_mysql_regex ||= Regexp.union(MYSQL_COMPONENTS.map { |component| COMPONENTS_REGEX_MAP[component] }) - end - - def detect_unmatched_pairs(obfuscated) - # We use this to check whether the query contains any quote characters - # after obfuscation. If so, that's a good indication that the original - # query was malformed, and so our obfuscation can't reliably find - # literals. In such a case, we'll replace the entire query with a - # placeholder. - %r{'|"|\/\*|\*\/}.match(obfuscated) - end - - def database_span_name(sql) - case config[:span_name] - when :statement_type - extract_statement_type(sql) - when :db_name - database_name - when :db_operation_and_name - op = OpenTelemetry::Instrumentation::Mysql2.attributes[SemanticConventions::Trace::DB_OPERATION] - name = database_name - if op && name - "#{op} #{name}" - elsif op - op - elsif name - name - end - end || 'mysql' - end - def database_name # https://github.com/brianmario/mysql2/blob/ca08712c6c8ea672df658bb25b931fea22555f27/lib/mysql2/client.rb#L78 (query_options[:database] || query_options[:dbname] || query_options[:db])&.to_s @@ -150,13 +68,6 @@ def tracer def config Mysql2::Instrumentation.instance.config end - - def extract_statement_type(sql) - QUERY_NAME_RE.match(sql) { |match| match[1].downcase } unless sql.nil? - rescue StandardError => e - OpenTelemetry.logger.debug("Error extracting sql statement type: #{e.message}") - nil - end end end end diff --git a/instrumentation/mysql2/opentelemetry-instrumentation-mysql2.gemspec b/instrumentation/mysql2/opentelemetry-instrumentation-mysql2.gemspec index 7261bdcfe..942980527 100644 --- a/instrumentation/mysql2/opentelemetry-instrumentation-mysql2.gemspec +++ b/instrumentation/mysql2/opentelemetry-instrumentation-mysql2.gemspec @@ -26,6 +26,8 @@ Gem::Specification.new do |spec| spec.required_ruby_version = '>= 3.0' spec.add_dependency 'opentelemetry-api', '~> 1.0' + spec.add_dependency 'opentelemetry-helpers-mysql', '~> 0.1.0' + spec.add_dependency 'opentelemetry-helpers-sql-obfuscation', '~> 0.1.0' spec.add_dependency 'opentelemetry-instrumentation-base', '~> 0.22.1' spec.add_development_dependency 'appraisal', '~> 2.5' diff --git a/instrumentation/mysql2/test/opentelemetry/instrumentation/mysql2/instrumentation_test.rb b/instrumentation/mysql2/test/opentelemetry/instrumentation/mysql2/instrumentation_test.rb index 6eb1ada72..8a0b5a948 100644 --- a/instrumentation/mysql2/test/opentelemetry/instrumentation/mysql2/instrumentation_test.rb +++ b/instrumentation/mysql2/test/opentelemetry/instrumentation/mysql2/instrumentation_test.rb @@ -182,7 +182,7 @@ client.query(sql) end.must_raise Mysql2::Error - _(span.name).must_equal 'mysql' + _(span.name).must_equal 'select' _(span.attributes['db.statement']).must_equal obfuscated_sql end diff --git a/instrumentation/pg/Gemfile b/instrumentation/pg/Gemfile index 2baf57ac4..6c8072b49 100644 --- a/instrumentation/pg/Gemfile +++ b/instrumentation/pg/Gemfile @@ -10,4 +10,5 @@ gemspec group :test do gem 'opentelemetry-instrumentation-base', path: '../base' + gem 'opentelemetry-helpers-sql-obfuscation', path: '../../helpers/sql-obfuscation' end diff --git a/instrumentation/pg/lib/opentelemetry/instrumentation/pg/constants.rb b/instrumentation/pg/lib/opentelemetry/instrumentation/pg/constants.rb index 6a324f148..73d9c8710 100644 --- a/instrumentation/pg/lib/opentelemetry/instrumentation/pg/constants.rb +++ b/instrumentation/pg/lib/opentelemetry/instrumentation/pg/constants.rb @@ -65,29 +65,6 @@ module Constants VALUES ].freeze - # From: https://github.com/newrelic/newrelic-ruby-agent/blob/9787095d4b5b2d8fcaf2fdbd964ed07c731a8b6b/lib/new_relic/agent/database/obfuscation_helpers.rb#L9-L34 - COMPONENTS_REGEX_MAP = { - single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/, - dollar_quotes: /(\$(?!\d)[^$]*?\$).*?(?:\1|$)/, - uuids: /\{?(?:[0-9a-fA-F]\-*){32}\}?/, - numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/, - boolean_literals: /\b(?:true|false|null)\b/i, - comments: /(?:#|--).*?(?=\r|\n|$)/i, - multi_line_comments: %r{\/\*(?:[^\/]|\/[^*])*?(?:\*\/|\/\*.*)} - }.freeze - - POSTGRES_COMPONENTS = %i[ - single_quotes - dollar_quotes - uuids - numeric_literals - boolean_literals - comments - multi_line_comments - ].freeze - - UNMATCHED_PAIRS_REGEX = %r{'|\/\*|\*\/|\$(?!\?)} - # These are all alike in that they will have a SQL statement as the first parameter. # That statement may possibly be parameterized, but we can still use it - the # obfuscation code will just transform $1 -> $? in that case (which is fine enough). diff --git a/instrumentation/pg/lib/opentelemetry/instrumentation/pg/patches/connection.rb b/instrumentation/pg/lib/opentelemetry/instrumentation/pg/patches/connection.rb index 75f000f14..bc3398c6f 100644 --- a/instrumentation/pg/lib/opentelemetry/instrumentation/pg/patches/connection.rb +++ b/instrumentation/pg/lib/opentelemetry/instrumentation/pg/patches/connection.rb @@ -50,6 +50,16 @@ module Connection # rubocop:disable Metrics/ModuleLength private + def obfuscate_sql(sql) + return sql unless config[:db_statement] == :obfuscate + + OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql( + sql, + obfuscation_limit: config[:obfuscation_limit], + adapter: :postgres + ) + end + def tracer PG::Instrumentation.instance.tracer end @@ -112,34 +122,6 @@ def validated_operation(operation) operation if PG::Constants::SQL_COMMANDS.include?(operation) end - def obfuscate_sql(sql) - return sql unless config[:db_statement] == :obfuscate - - if sql.size > config[:obfuscation_limit] - first_match_index = sql.index(generated_postgres_regex) - truncation_message = "SQL truncated (> #{config[:obfuscation_limit]} characters)" - return truncation_message unless first_match_index - - truncated_sql = sql[..first_match_index - 1] - return "#{truncated_sql}...\n#{truncation_message}" - end - - # From: - # https://github.com/newrelic/newrelic-ruby-agent/blob/9787095d4b5b2d8fcaf2fdbd964ed07c731a8b6b/lib/new_relic/agent/database/obfuscator.rb - # https://github.com/newrelic/newrelic-ruby-agent/blob/9787095d4b5b2d8fcaf2fdbd964ed07c731a8b6b/lib/new_relic/agent/database/obfuscation_helpers.rb - obfuscated = sql.gsub(generated_postgres_regex, '?') - obfuscated = 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if PG::Constants::UNMATCHED_PAIRS_REGEX.match(obfuscated) - - obfuscated - rescue StandardError => e - OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e) - 'OpenTelemetry error: failed to obfuscate sql' - end - - def generated_postgres_regex - @generated_postgres_regex ||= Regexp.union(PG::Constants::POSTGRES_COMPONENTS.map { |component| PG::Constants::COMPONENTS_REGEX_MAP[component] }) - end - def client_attributes attributes = { 'db.system' => 'postgresql', diff --git a/instrumentation/pg/opentelemetry-instrumentation-pg.gemspec b/instrumentation/pg/opentelemetry-instrumentation-pg.gemspec index 324fd5aaf..83bfc2f13 100644 --- a/instrumentation/pg/opentelemetry-instrumentation-pg.gemspec +++ b/instrumentation/pg/opentelemetry-instrumentation-pg.gemspec @@ -26,6 +26,7 @@ Gem::Specification.new do |spec| spec.required_ruby_version = '>= 3.0' spec.add_dependency 'opentelemetry-api', '~> 1.0' + spec.add_dependency 'opentelemetry-helpers-sql-obfuscation', '~> 0.1.0' spec.add_dependency 'opentelemetry-instrumentation-base', '~> 0.22.1' spec.add_development_dependency 'activerecord' diff --git a/instrumentation/que/Gemfile b/instrumentation/que/Gemfile index b1ea20ba3..a1fad5525 100644 --- a/instrumentation/que/Gemfile +++ b/instrumentation/que/Gemfile @@ -9,7 +9,8 @@ source 'https://rubygems.org' gemspec group :test do + gem 'opentelemetry-helpers-sql-obfuscation', path: '../../helpers/sql-obfuscation' gem 'opentelemetry-instrumentation-base', path: '../base' - gem 'opentelemetry-instrumentation-pg', path: '../../instrumentation/pg' + gem 'opentelemetry-instrumentation-pg', path: '../pg' gem 'appraisal' end diff --git a/instrumentation/trilogy/Gemfile b/instrumentation/trilogy/Gemfile index 2baf57ac4..589c22ad6 100644 --- a/instrumentation/trilogy/Gemfile +++ b/instrumentation/trilogy/Gemfile @@ -10,4 +10,6 @@ gemspec group :test do gem 'opentelemetry-instrumentation-base', path: '../base' + gem 'opentelemetry-helpers-mysql', path: '../../helpers/mysql' + gem 'opentelemetry-helpers-sql-obfuscation', path: '../../helpers/sql-obfuscation' end diff --git a/instrumentation/trilogy/lib/opentelemetry/instrumentation/trilogy/patches/client.rb b/instrumentation/trilogy/lib/opentelemetry/instrumentation/trilogy/patches/client.rb index 4a74c4480..30d7cd98b 100644 --- a/instrumentation/trilogy/lib/opentelemetry/instrumentation/trilogy/patches/client.rb +++ b/instrumentation/trilogy/lib/opentelemetry/instrumentation/trilogy/patches/client.rb @@ -1,53 +1,15 @@ # frozen_string_literal: true +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + module OpenTelemetry module Instrumentation module Trilogy module Patches # Module to prepend to Trilogy for instrumentation - module Client # rubocop:disable Metrics/ModuleLength - QUERY_NAMES = [ - 'set names', - 'select', - 'insert', - 'update', - 'delete', - 'begin', - 'commit', - 'rollback', - 'savepoint', - 'release savepoint', - 'explain', - 'drop database', - 'drop table', - 'create database', - 'create table' - ].freeze - - QUERY_NAME_RE = Regexp.new("^(#{QUERY_NAMES.join('|')})", Regexp::IGNORECASE) - - COMPONENTS_REGEX_MAP = { - single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/, - double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/, - numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/, - boolean_literals: /\b(?:true|false|null)\b/i, - hexadecimal_literals: /0x[0-9a-fA-F]+/, - comments: /(?:#|--).*?(?=\r|\n|$)/i, - multi_line_comments: %r{\/\*(?:[^\/]|\/[^*])*?(?:\*\/|\/\*.*)} - }.freeze - - MYSQL_COMPONENTS = %i[ - single_quotes - double_quotes - numeric_literals - boolean_literals - hexadecimal_literals - comments - multi_line_comments - ].freeze - - FULL_SQL_REGEXP = Regexp.union(MYSQL_COMPONENTS.map { |component| COMPONENTS_REGEX_MAP[component] }) - + module Client def initialize(options = {}) @connection_options = options # This is normally done by Trilogy#initialize @@ -72,8 +34,17 @@ def ping(...) def query(sql) tracer.in_span( - database_span_name(sql), - attributes: client_attributes(sql).merge!(OpenTelemetry::Instrumentation::Trilogy.attributes), + OpenTelemetry::Helpers::MySQL.database_span_name( + sql, + OpenTelemetry::Instrumentation::Trilogy.attributes[ + OpenTelemetry::SemanticConventions::Trace::DB_OPERATION + ], + database_name, + config + ), + attributes: client_attributes(sql).merge!( + OpenTelemetry::Instrumentation::Trilogy.attributes + ), kind: :client ) do super(sql) @@ -96,7 +67,8 @@ def client_attributes(sql = nil) if sql case config[:db_statement] when :obfuscate - attributes[::OpenTelemetry::SemanticConventions::Trace::DB_STATEMENT] = obfuscate_sql(sql) + attributes[::OpenTelemetry::SemanticConventions::Trace::DB_STATEMENT] = + OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, obfuscation_limit: config[:obfuscation_limit], adapter: :mysql) when :include attributes[::OpenTelemetry::SemanticConventions::Trace::DB_STATEMENT] = sql end @@ -105,53 +77,6 @@ def client_attributes(sql = nil) attributes end - def obfuscate_sql(sql) - if sql.size > config[:obfuscation_limit] - first_match_index = sql.index(FULL_SQL_REGEXP) - truncation_message = "SQL truncated (> #{config[:obfuscation_limit]} characters)" - return truncation_message unless first_match_index - - truncated_sql = sql[..first_match_index - 1] - "#{truncated_sql}...\n#{truncation_message}" - else - obfuscated = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) - obfuscated = obfuscated.gsub(FULL_SQL_REGEXP, '?') - obfuscated = 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if detect_unmatched_pairs(obfuscated) - obfuscated - end - rescue StandardError => e - OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e) - 'OpenTelemetry error: failed to obfuscate sql' - end - - def detect_unmatched_pairs(obfuscated) - # We use this to check whether the query contains any quote characters - # after obfuscation. If so, that's a good indication that the original - # query was malformed, and so our obfuscation can't reliably find - # literals. In such a case, we'll replace the entire query with a - # placeholder. - %r{'|"|\/\*|\*\/}.match(obfuscated) - end - - def database_span_name(sql) - case config[:span_name] - when :statement_type - extract_statement_type(sql) - when :db_name - database_name - when :db_operation_and_name - op = OpenTelemetry::Instrumentation::Trilogy.attributes['db.operation'] - name = database_name - if op && name - "#{op} #{name}" - elsif op - op - elsif name - name - end - end || 'mysql' - end - def database_name connection_options[:database] end @@ -167,13 +92,6 @@ def tracer def config Trilogy::Instrumentation.instance.config end - - def extract_statement_type(sql) - QUERY_NAME_RE.match(sql) { |match| match[1].downcase } unless sql.nil? - rescue StandardError => e - OpenTelemetry.logger.error("Error extracting sql statement type: #{e.message}") - nil - end end end end diff --git a/instrumentation/trilogy/opentelemetry-instrumentation-trilogy.gemspec b/instrumentation/trilogy/opentelemetry-instrumentation-trilogy.gemspec index a0d9bd316..870ae61f9 100644 --- a/instrumentation/trilogy/opentelemetry-instrumentation-trilogy.gemspec +++ b/instrumentation/trilogy/opentelemetry-instrumentation-trilogy.gemspec @@ -26,6 +26,8 @@ Gem::Specification.new do |spec| spec.required_ruby_version = '>= 3.0' spec.add_dependency 'opentelemetry-api', '~> 1.0' + spec.add_dependency 'opentelemetry-helpers-mysql', '~> 0.1.0' + spec.add_dependency 'opentelemetry-helpers-sql-obfuscation', '~> 0.1.0' spec.add_dependency 'opentelemetry-instrumentation-base', '~> 0.22.1' spec.add_dependency 'opentelemetry-semantic_conventions', '>= 1.8.0' diff --git a/instrumentation/trilogy/test/opentelemetry/instrumentation/trilogy/instrumentation_test.rb b/instrumentation/trilogy/test/opentelemetry/instrumentation/trilogy/instrumentation_test.rb index ffc1a58ec..c4a551dff 100644 --- a/instrumentation/trilogy/test/opentelemetry/instrumentation/trilogy/instrumentation_test.rb +++ b/instrumentation/trilogy/test/opentelemetry/instrumentation/trilogy/instrumentation_test.rb @@ -316,7 +316,7 @@ client.query(sql) end.must_raise Trilogy::Error - _(span.name).must_equal 'mysql' + _(span.name).must_equal 'select' _(span.attributes[OpenTelemetry::SemanticConventions::Trace::DB_STATEMENT]).must_equal obfuscated_sql end