Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PROF-11045] Fix profiling warnings being really hard to silence #4232

Merged
merged 10 commits into from
Dec 17, 2024
3 changes: 2 additions & 1 deletion lib/datadog/core/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ def initialize(settings)
@profiler, profiler_logger_extra = Datadog::Profiling::Component.build_profiler_component(
settings: settings,
agent_settings: agent_settings,
optional_tracer: @tracer
optional_tracer: @tracer,
logger: @logger,
)
@environment_logger_extra.merge!(profiler_logger_extra) if profiler_logger_extra

Expand Down
108 changes: 55 additions & 53 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ module Datadog
module Profiling
# Responsible for wiring up the Profiler for execution
module Component
ALLOCATION_WITH_RACTORS_ONLY_ONCE = Datadog::Core::Utils::OnlyOnce.new
private_constant :ALLOCATION_WITH_RACTORS_ONLY_ONCE

# Passing in a `nil` tracer is supported and will disable the following profiling features:
# * Code Hotspots panel in the trace viewer, as well as scoping a profile down to a span
# * Profiling in the trace viewer, as well as scoping a profile down to a span
# * Endpoint aggregation in the profiler UX, including normalization (resource per endpoint call)
def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) # rubocop:disable Metrics/MethodLength
def self.build_profiler_component(settings:, agent_settings:, optional_tracer:, logger:) # rubocop:disable Metrics/MethodLength
return [nil, {profiling_enabled: false}] unless settings.profiling.enabled

# Workaround for weird dependency direction: the Core::Configuration::Components class currently has a
Expand Down Expand Up @@ -36,14 +39,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)

# NOTE: Please update the Initialization section of ProfilingDevelopment.md with any changes to this method

no_signals_workaround_enabled = no_signals_workaround_enabled?(settings)
no_signals_workaround_enabled = no_signals_workaround_enabled?(settings, logger)
timeline_enabled = settings.profiling.advanced.timeline_enabled
allocation_profiling_enabled = enable_allocation_profiling?(settings)
allocation_profiling_enabled = enable_allocation_profiling?(settings, logger)
heap_sample_every = get_heap_sample_every(settings)
heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_every)
heap_size_profiling_enabled = enable_heap_size_profiling?(settings, heap_profiling_enabled)
heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_every, logger)
heap_size_profiling_enabled = enable_heap_size_profiling?(settings, heap_profiling_enabled, logger)

overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage)
overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage, logger)
upload_period_seconds = [60, settings.profiling.advanced.upload_period_seconds].max

recorder = Datadog::Profiling::StackRecorder.new(
Expand All @@ -57,13 +60,13 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
)
thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled)
worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new(
gc_profiling_enabled: enable_gc_profiling?(settings),
gc_profiling_enabled: enable_gc_profiling?(settings, logger),
no_signals_workaround_enabled: no_signals_workaround_enabled,
thread_context_collector: thread_context_collector,
dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage,
allocation_profiling_enabled: allocation_profiling_enabled,
allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled,
gvl_profiling_enabled: enable_gvl_profiling?(settings),
gvl_profiling_enabled: enable_gvl_profiling?(settings, logger),
)

internal_metadata = {
Expand Down Expand Up @@ -120,7 +123,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
)
end

private_class_method def self.enable_gc_profiling?(settings)
private_class_method def self.enable_gc_profiling?(settings, logger)
return false unless settings.profiling.advanced.gc_enabled

# SEVERE - Only with Ractors
Expand All @@ -131,14 +134,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
if RUBY_VERSION.start_with?("3.0.") ||
(RUBY_VERSION.start_with?("3.1.") && RUBY_VERSION < "3.1.4") ||
(RUBY_VERSION.start_with?("3.2.") && RUBY_VERSION < "3.2.3")
Datadog.logger.warn(
logger.warn(
"Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling GC profiling would cause " \
"crashes (https://bugs.ruby-lang.org/issues/18464). GC profiling has been disabled."
)
return false
elsif RUBY_VERSION.start_with?("3.")
Datadog.logger.debug(
"In all known versions of Ruby 3.x, using Ractors may result in GC profiling unexpectedly " \
logger.debug(
"Using Ractors may result in GC profiling unexpectedly " \
"stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your " \
"application stability or performance. This does not happen if Ractors are not used."
)
Expand All @@ -155,7 +158,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
heap_sample_rate
end

private_class_method def self.enable_allocation_profiling?(settings)
private_class_method def self.enable_allocation_profiling?(settings, logger)
return false unless settings.profiling.allocation_enabled

# Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x.
Expand All @@ -165,7 +168,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
# https://github.com/ruby/ruby/pull/7464) that makes this crash in any configuration. This bug is
# fixed on Ruby versions 3.2.3 and 3.3.0.
if RUBY_VERSION.start_with?("3.2.") && RUBY_VERSION < "3.2.3"
Datadog.logger.warn(
logger.warn(
"Allocation profiling is not supported in Ruby versions 3.2.0, 3.2.1 and 3.2.2 and will be forcibly " \
"disabled. This is due to a VM bug that can lead to crashes (https://bugs.ruby-lang.org/issues/19482). " \
"Other Ruby versions do not suffer from this issue."
Expand All @@ -181,7 +184,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
if RUBY_VERSION.start_with?("3.0.") ||
(RUBY_VERSION.start_with?("3.1.") && RUBY_VERSION < "3.1.4") ||
(RUBY_VERSION.start_with?("3.2.") && RUBY_VERSION < "3.2.3")
Datadog.logger.warn(
logger.warn(
"Current Ruby version (#{RUBY_VERSION}) has a VM bug where enabling allocation profiling while using " \
"Ractors may cause unexpected issues, including crashes (https://bugs.ruby-lang.org/issues/18464). " \
"This does not happen if Ractors are not used."
Expand All @@ -190,25 +193,27 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
# On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets
# garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on.
elsif RUBY_VERSION.start_with?("3.")
Datadog.logger.warn(
"In all known versions of Ruby 3.x, using Ractors may result in allocation profiling unexpectedly " \
"stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your " \
"application stability or performance. This does not happen if Ractors are not used."
)
ALLOCATION_WITH_RACTORS_ONLY_ONCE.run do
logger.info(
"Using Ractors may result in allocation profiling " \
"stopping (https://bugs.ruby-lang.org/issues/19112). Note that this stop has no impact in your " \
"application stability or performance. This does not happen if Ractors are not used."
)
end
end

Datadog.logger.debug("Enabled allocation profiling")
logger.debug("Enabled allocation profiling")

true
end

private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_rate)
private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled, heap_sample_rate, logger)
heap_profiling_enabled = settings.profiling.advanced.experimental_heap_enabled

return false unless heap_profiling_enabled

if RUBY_VERSION < "3.1"
Datadog.logger.warn(
logger.warn(
"Current Ruby version (#{RUBY_VERSION}) cannot support heap profiling due to VM limitations. " \
"Please upgrade to Ruby >= 3.1 in order to use this feature. Heap profiling has been disabled."
)
Expand All @@ -219,33 +224,31 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
raise ArgumentError, "Heap profiling requires allocation profiling to be enabled"
end

Datadog.logger.warn(
logger.warn(
"Enabled experimental heap profiling: heap_sample_rate=#{heap_sample_rate}. This is experimental, not " \
"recommended, and will increase overhead!"
)

true
end

private_class_method def self.enable_heap_size_profiling?(settings, heap_profiling_enabled)
private_class_method def self.enable_heap_size_profiling?(settings, heap_profiling_enabled, logger)
heap_size_profiling_enabled = settings.profiling.advanced.experimental_heap_size_enabled

return false unless heap_profiling_enabled && heap_size_profiling_enabled

Datadog.logger.warn(
logger.warn(
"Enabled experimental heap size profiling. This is experimental, not recommended, and will increase overhead!"
)

true
end

private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength
private_class_method def self.no_signals_workaround_enabled?(settings, logger) # rubocop:disable Metrics/MethodLength
setting_value = settings.profiling.advanced.no_signals_workaround_enabled
legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?("2.5.")

unless [true, false, :auto].include?(setting_value)
# TODO: Replace with a warning instead.
Datadog.logger.error(
logger.warn(
"Ignoring invalid value for profiling no_signals_workaround_enabled setting: #{setting_value.inspect}. " \
"Valid options are `true`, `false` or (default) `:auto`."
)
Expand All @@ -254,23 +257,23 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
end

if setting_value == false
if legacy_ruby_that_should_use_workaround
Datadog.logger.warn(
'The profiling "no signals" workaround has been disabled via configuration on a legacy Ruby version ' \
"(< 2.6). This is not recommended " \
"in production environments, as due to limitations in Ruby APIs, we suspect it may lead to crashes " \
"in very rare situations. Please report any issues you run into to Datadog support or " \
if RUBY_VERSION.start_with?("2.5.")
logger.warn(
'The profiling "no signals" workaround has been disabled via configuration on Ruby 2.5. ' \
"This is not recommended " \
"in production environments, as due to limitations in Ruby APIs, we suspect it may lead to rare crashes " \
"Please report any issues you run into to Datadog support or " \
"via <https://github.com/datadog/dd-trace-rb/issues/new>!"
)
else
Datadog.logger.warn('Profiling "no signals" workaround disabled via configuration')
logger.warn('Profiling "no signals" workaround disabled via configuration')
end

return false
end

if setting_value == true
Datadog.logger.warn(
logger.warn(
'Profiling "no signals" workaround enabled via configuration. Profiling data will have lower quality.'
)

Expand All @@ -280,10 +283,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
# Setting is in auto mode. Let's probe to see if we should enable it:

# We don't warn users in this situation because "upgrade your Ruby" is not a great warning
return true if legacy_ruby_that_should_use_workaround
return true if RUBY_VERSION.start_with?("2.5.")

if Gem.loaded_specs["mysql2"] && incompatible_libmysqlclient_version?(settings)
Datadog.logger.warn(
if Gem.loaded_specs["mysql2"] && incompatible_libmysqlclient_version?(settings, logger)
logger.warn(
'Enabling the profiling "no signals" workaround because an incompatible version of the mysql2 gem is ' \
"installed. Profiling data will have lower quality. " \
"To fix this, upgrade the libmysqlclient in your OS image to version 8.0.0 or above."
Expand All @@ -292,7 +295,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
end

if Gem.loaded_specs["rugged"]
Datadog.logger.warn(
logger.warn(
'Enabling the profiling "no signals" workaround because the rugged gem is installed. ' \
"This is needed because some operations on this gem are currently incompatible with the normal working mode " \
"of the profiler, as detailed in <https://github.com/datadog/dd-trace-rb/issues/2721>. " \
Expand All @@ -302,7 +305,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
end

if (defined?(::PhusionPassenger) || Gem.loaded_specs["passenger"]) && incompatible_passenger_version?
Datadog.logger.warn(
logger.warn(
'Enabling the profiling "no signals" workaround because an incompatible version of the passenger gem is ' \
"installed. Profiling data will have lower quality." \
"To fix this, upgrade the passenger gem to version 6.0.19 or above."
Expand All @@ -322,10 +325,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
#
# The `mysql2` gem's `info` method can be used to determine which `libmysqlclient` version is in use, and thus to
# detect if it's safe for the profiler to use signals or if we need to employ a fallback.
private_class_method def self.incompatible_libmysqlclient_version?(settings)
private_class_method def self.incompatible_libmysqlclient_version?(settings, logger)
return true if settings.profiling.advanced.skip_mysql2_check

Datadog.logger.debug(
logger.debug(
"Requiring `mysql2` to check if the `libmysqlclient` version it uses is compatible with profiling"
)

Expand Down Expand Up @@ -354,14 +357,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
libmysqlclient_version >= Gem::Version.new("8.0.0") ||
looks_like_mariadb?(info, libmysqlclient_version)

Datadog.logger.debug(
logger.debug(
"The `mysql2` gem is using #{compatible ? "a compatible" : "an incompatible"} version of " \
"the `libmysqlclient` library (#{libmysqlclient_version})"
)

!compatible
rescue StandardError, LoadError => e
Datadog.logger.warn(
logger.warn(
"Failed to probe `mysql2` gem information. " \
"Cause: #{e.class.name} #{e.message} Location: #{Array(e.backtrace).first}"
)
Expand All @@ -383,12 +386,11 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
end
end

private_class_method def self.valid_overhead_target(overhead_target_percentage)
private_class_method def self.valid_overhead_target(overhead_target_percentage, logger)
if overhead_target_percentage > 0 && overhead_target_percentage <= 20
overhead_target_percentage
else
# TODO: Replace with a warning instead.
Datadog.logger.error(
logger.warn(
"Ignoring invalid value for profiling overhead_target_percentage setting: " \
"#{overhead_target_percentage.inspect}. Falling back to default value."
)
Expand Down Expand Up @@ -432,10 +434,10 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
settings.profiling.advanced.dir_interruption_workaround_enabled
end

private_class_method def self.enable_gvl_profiling?(settings)
private_class_method def self.enable_gvl_profiling?(settings, logger)
if RUBY_VERSION < "3.2"
if settings.profiling.advanced.preview_gvl_enabled
Datadog.logger.warn("GVL profiling is currently not supported in Ruby < 3.2 and will not be enabled.")
logger.warn("GVL profiling is currently not supported in Ruby < 3.2 and will not be enabled.")
end

return false
Expand Down
Loading
Loading