diff --git a/lib/datadog/di/code_tracker.rb b/lib/datadog/di/code_tracker.rb index 5ace92a17d2..010f04c7368 100644 --- a/lib/datadog/di/code_tracker.rb +++ b/lib/datadog/di/code_tracker.rb @@ -91,7 +91,7 @@ def start # before release. if component = DI.current_component # steep:ignore raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions - component.logger.warn("Unhandled exception in script_compiled trace point: #{exc.class}: #{exc}") + component.log_warn_internal("Unhandled exception in script_compiled trace point: #{exc.class}: #{exc}") component.telemetry&.report(exc, description: "Unhandled exception in script_compiled trace point") # TODO test this path else diff --git a/lib/datadog/di/component.rb b/lib/datadog/di/component.rb index c109bd4082e..715f60e39dd 100644 --- a/lib/datadog/di/component.rb +++ b/lib/datadog/di/component.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative 'logging' + module Datadog module DI # Component for dynamic instrumentation. @@ -13,12 +15,14 @@ module DI # intalled tracepoints and so on. Component will clean up all # resources and installed tracepoints upon shutdown. class Component + include Logging + class << self def build(settings, agent_settings, telemetry: nil) return unless settings.respond_to?(:dynamic_instrumentation) && settings.dynamic_instrumentation.enabled unless settings.respond_to?(:remote) && settings.remote.enabled - Datadog.logger.debug("Dynamic Instrumentation could not be enabled because Remote Configuration Management is not available. To enable Remote Configuration, see https://docs.datadoghq.com/agent/remote_config") + Datadog.logger.warn("Dynamic Instrumentation could not be enabled because Remote Configuration Management is not available. To enable Remote Configuration, see https://docs.datadoghq.com/agent/remote_config") return end @@ -53,12 +57,12 @@ def environment_supported?(settings) # TODO add tests? unless settings.dynamic_instrumentation.internal.development if Datadog::Core::Environment::Execution.development? - Datadog.logger.debug("Not enabling dynamic instrumentation because we are in development environment") + Datadog.logger.warn("Not enabling dynamic instrumentation because we are in development environment") return false end end if RUBY_ENGINE != 'ruby' || RUBY_VERSION < '2.6' - Datadog.logger.debug("Not enabling dynamic instrumentation because of unsupported Ruby version") + Datadog.logger.warn("Not enabling dynamic instrumentation because of unsupported Ruby version") return false end true diff --git a/lib/datadog/di/configuration/settings.rb b/lib/datadog/di/configuration/settings.rb index 55413613494..1a12e518f45 100644 --- a/lib/datadog/di/configuration/settings.rb +++ b/lib/datadog/di/configuration/settings.rb @@ -157,6 +157,15 @@ def self.add_settings!(base) o.default false end + # Increases logging verbosity level from 'debug' to 'warn' + # for internal DI diagnostics, i.e. log messages that + # should not be written to customers' logs in production but + # that are useful when developing DI. + option :verbose_logging do |o| + o.type :bool + o.default false + end + # Minimum interval, in seconds, between probe status and # snapshot submissions to the agent. Probe notifier worker will # batch together payloads submitted during each interval. diff --git a/lib/datadog/di/instrumenter.rb b/lib/datadog/di/instrumenter.rb index b1f8f0f599a..77f382fde6e 100644 --- a/lib/datadog/di/instrumenter.rb +++ b/lib/datadog/di/instrumenter.rb @@ -3,6 +3,7 @@ # rubocop:disable Lint/AssignmentInCondition require 'benchmark' +require_relative 'logging' module Datadog module DI @@ -54,6 +55,8 @@ module DI # # @api private class Instrumenter + include Logging + def initialize(settings, serializer, logger, code_tracker: nil, telemetry: nil) @settings = settings @serializer = serializer @@ -305,13 +308,13 @@ def hook_line(probe, &block) end rescue => exc raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}") + log_warn_internal("Unhandled exception in line trace point: #{exc.class}: #{exc}") telemetry&.report(exc, description: "Unhandled exception in line trace point") # TODO test this path end rescue => exc raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}") + log_warn_internal("Unhandled exception in line trace point: #{exc.class}: #{exc}") telemetry&.report(exc, description: "Unhandled exception in line trace point") # TODO test this path end @@ -357,7 +360,7 @@ def hook(probe, &block) hook_line(probe, &block) else # TODO add test coverage for this path - logger.warn("Unknown probe type to hook: #{probe}") + log_warn_internal("Unknown probe type to hook: #{probe}") end end @@ -368,7 +371,7 @@ def unhook(probe) unhook_line(probe) else # TODO add test coverage for this path - logger.warn("Unknown probe type to unhook: #{probe}") + log_warn_internal("Unknown probe type to unhook: #{probe}") end end diff --git a/lib/datadog/di/logging.rb b/lib/datadog/di/logging.rb new file mode 100644 index 00000000000..24a26f53fae --- /dev/null +++ b/lib/datadog/di/logging.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Datadog + module DI + # Logging for DI + module Logging + # Logs an internal warning message. + # When verbose logging is enabled, the logging happens at warn level. + # When verbose logging is disabled, the logging happens at debug level + # (which is how the rest of the library is reporting its internal + # warnings/errors). + def log_warn_internal(msg) + if settings.dynamic_instrumentation.internal.verbose_logging + logger.warn(msg) + else + logger.debug(msg) + end + end + + # Logs an internal informational message. + # When verbose logging is enabled, the logging happens at info level. + # When verbose logging is disabled, nothing is logged. + def log_info_internal(msg) + if settings.dynamic_instrumentation.internal.verbose_logging + logger.info(msg) + end + end + end + end +end diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb index 9dd74d85b5e..97ed41e5540 100644 --- a/lib/datadog/di/probe_manager.rb +++ b/lib/datadog/di/probe_manager.rb @@ -3,6 +3,7 @@ # rubocop:disable Lint/AssignmentInCondition require 'monitor' +require_relative 'logging' module Datadog module DI @@ -15,6 +16,8 @@ module DI # # @api private class ProbeManager + include Logging + def initialize(settings, instrumenter, probe_notification_builder, probe_notifier_worker, logger, telemetry: nil) @settings = settings @@ -32,7 +35,7 @@ def initialize(settings, instrumenter, probe_notification_builder, install_pending_method_probes(tp.self) rescue => exc raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Unhandled exception in definition trace point: #{exc.class}: #{exc}") + log_warn_internal("Unhandled exception in definition trace point: #{exc.class}: #{exc}") telemetry&.report(exc, description: "Unhandled exception in definition trace point") # TODO test this path end @@ -120,7 +123,7 @@ def add_probe(probe) # In "propagate all exceptions" mode we will try to instrument again. raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Error processing probe configuration: #{exc.class}: #{exc}") + log_warn_internal("Error processing probe configuration: #{exc.class}: #{exc}") telemetry&.report(exc, description: "Error processing probe configuration") # TODO report probe as failed to agent since we won't attempt to # install it again. @@ -160,7 +163,7 @@ def remove_other_probes(probe_ids) raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions # Silence all exceptions? # TODO should we propagate here and rescue upstream? - logger.warn("Error removing probe #{probe.id}: #{exc.class}: #{exc}") + log_warn_internal("Error removing probe #{probe.id}: #{exc.class}: #{exc}") telemetry&.report(exc, description: "Error removing probe") end end @@ -190,7 +193,7 @@ def remove_other_probes(probe_ids) rescue => exc raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Error installing probe after class is defined: #{exc.class}: #{exc}") + log_warn_internal("Error installing probe after class is defined: #{exc.class}: #{exc}") telemetry&.report(exc, description: "Error installing probe after class is defined") end end diff --git a/lib/datadog/di/probe_notifier_worker.rb b/lib/datadog/di/probe_notifier_worker.rb index 6ce2ccdf340..c999afce666 100644 --- a/lib/datadog/di/probe_notifier_worker.rb +++ b/lib/datadog/di/probe_notifier_worker.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative '../core/semaphore' +require_relative 'logging' module Datadog module DI @@ -23,6 +24,8 @@ module DI # # @api private class ProbeNotifierWorker + include Logging + def initialize(settings, transport, logger, telemetry: nil) @settings = settings @telemetry = telemetry @@ -77,7 +80,7 @@ def start rescue => exc raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Error in probe notifier worker: #{exc.class}: #{exc} (at #{exc.backtrace.first})") + log_warn_internal("Error in probe notifier worker: #{exc.class}: #{exc} (at #{exc.backtrace.first})") telemetry&.report(exc, description: "Error in probe notifier worker") end @lock.synchronize do @@ -185,7 +188,7 @@ def io_in_progress? queue = send("#{event_type}_queue") # TODO determine a suitable limit via testing/benchmarking if queue.length > 100 - logger.warn("#{self.class.name}: dropping #{event_type} because queue is full") + log_warn_internal("#{self.class.name}: dropping #{event_type} because queue is full") else queue << event end @@ -242,7 +245,7 @@ def set_sleep_remaining end rescue => exc raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("failed to send #{event_name}: #{exc.class}: #{exc} (at #{exc.backtrace.first})") + log_warn_internal("failed to send #{event_name}: #{exc.class}: #{exc} (at #{exc.backtrace.first})") # Should we report this error to telemetry? Most likely failure # to send is due to a network issue, and trying to send a # telemetry message would also fail. @@ -253,7 +256,7 @@ def set_sleep_remaining # Normally the queue should only be consumed in this method, # however if anyone consumes it elsewhere we don't want to block # while consuming it here. Rescue ThreadError and return. - logger.warn("Unexpected #{event_name} queue underflow - consumed elsewhere?") + log_warn_internal("Unexpected #{event_name} queue underflow - consumed elsewhere?") telemetry&.report(exc, description: "Unexpected #{event_name} queue underflow") ensure @lock.synchronize do diff --git a/lib/datadog/di/remote.rb b/lib/datadog/di/remote.rb index e0a5f3b90ed..eb76abe30e7 100644 --- a/lib/datadog/di/remote.rb +++ b/lib/datadog/di/remote.rb @@ -51,7 +51,7 @@ def receivers(telemetry) probe = ProbeBuilder.build_from_remote_config(probe_spec) payload = component.probe_notification_builder.build_received(probe) component.probe_notifier_worker.add_status(payload) - component.logger.info("Received probe from RC: #{probe.type} #{probe.location}") + component.log_info_internal("Received probe from RC: #{probe.type} #{probe.location}") begin # TODO test exception capture @@ -60,7 +60,7 @@ def receivers(telemetry) rescue => exc raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions - component.logger.warn("Unhandled exception adding probe in DI remote receiver: #{exc.class}: #{exc}") + component.log_warn_internal("Unhandled exception adding probe in DI remote receiver: #{exc.class}: #{exc}") component.telemetry&.report(exc, description: "Unhandled exception adding probe in DI remote receiver") # If a probe fails to install, we will mark the content @@ -81,7 +81,7 @@ def receivers(telemetry) rescue => exc raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions - component.logger.warn("Unhandled exception handling probe in DI remote receiver: #{exc.class}: #{exc}") + component.log_warn_internal("Unhandled exception handling probe in DI remote receiver: #{exc.class}: #{exc}") component.telemetry&.report(exc, description: "Unhandled exception handling probe in DI remote receiver") content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}: #{Array(exc.backtrace).join("\n")}") @@ -95,7 +95,7 @@ def receivers(telemetry) rescue => exc raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions - component.logger.warn("Unhandled exception removing probes in DI remote receiver: #{exc.class}: #{exc}") + component.log_warn_internal("Unhandled exception removing probes in DI remote receiver: #{exc.class}: #{exc}") component.telemetry&.report(exc, description: "Unhandled exception removing probes in DI remote receiver") end end diff --git a/spec/datadog/di/instrumenter_spec.rb b/spec/datadog/di/instrumenter_spec.rb index 695d8567386..276876dec66 100644 --- a/spec/datadog/di/instrumenter_spec.rb +++ b/spec/datadog/di/instrumenter_spec.rb @@ -11,6 +11,7 @@ mock_settings_for_di do |settings| allow(settings.dynamic_instrumentation).to receive(:enabled).and_return(true) allow(settings.dynamic_instrumentation.internal).to receive(:untargeted_trace_points).and_return(false) + allow(settings.dynamic_instrumentation.internal).to receive(:verbose_logging).and_return(true) allow(settings.dynamic_instrumentation).to receive(:max_capture_depth).and_return(2) allow(settings.dynamic_instrumentation).to receive(:max_capture_attribute_count).and_return(2) allow(settings.dynamic_instrumentation).to receive(:max_capture_string_length).and_return(100) diff --git a/spec/datadog/di/probe_manager_spec.rb b/spec/datadog/di/probe_manager_spec.rb index f32cb71f577..446a5ef9df2 100644 --- a/spec/datadog/di/probe_manager_spec.rb +++ b/spec/datadog/di/probe_manager_spec.rb @@ -9,6 +9,7 @@ class ProbeManagerSpecTestClass; end mock_settings_for_di do |settings| allow(settings.dynamic_instrumentation).to receive(:enabled).and_return(true) allow(settings.dynamic_instrumentation.internal).to receive(:propagate_all_exceptions).and_return(false) + allow(settings.dynamic_instrumentation.internal).to receive(:verbose_logging).and_return(true) end let(:instrumenter) do diff --git a/spec/datadog/di/remote_spec.rb b/spec/datadog/di/remote_spec.rb index f3a7d386b9e..4a6056d4989 100644 --- a/spec/datadog/di/remote_spec.rb +++ b/spec/datadog/di/remote_spec.rb @@ -92,6 +92,7 @@ mock_settings_for_di do |settings| allow(settings.dynamic_instrumentation).to receive(:enabled).and_return(true) allow(settings.dynamic_instrumentation.internal).to receive(:propagate_all_exceptions).and_return(false) + allow(settings.dynamic_instrumentation.internal).to receive(:verbose_logging).and_return(true) end let(:serializer) do @@ -144,8 +145,7 @@ end it 'calls probe manager to add a probe' do - expect(component).to receive(:logger).and_return(logger) - expect(logger).to receive(:info) do |message| + expect(component).to receive(:log_info_internal) do |message| expect(message).to match(/Received probe/) end @@ -162,15 +162,13 @@ context 'probe addition raises an exception' do it 'logs warning and consumes the exception' do expect(component).to receive(:telemetry).and_return(telemetry) - expect(component).to receive(:logger).and_return(logger) - expect(logger).to receive(:info) do |message| + expect(component).to receive(:log_info_internal) do |message| expect(message).to match(/Received probe/) end - expect(logger).to receive(:warn) do |msg| + expect(component).to receive(:log_warn_internal) do |msg| expect(msg).to match(/Unhandled exception.*Runtime error from test/) end - expect(component).to receive(:logger).and_return(logger) expect(telemetry).to receive(:report) expect(probe_manager).to receive(:add_probe).and_raise("Runtime error from test") @@ -186,17 +184,15 @@ it 'calls probe manager to remove stale probes' do allow(component).to receive(:telemetry) - expect(component).to receive(:logger).and_return(logger) - expect(logger).to receive(:info) do |message| + expect(component).to receive(:log_info_internal) do |message| expect(message).to match(/Received probe/) end - expect(logger).to receive(:warn) do |msg| + expect(component).to receive(:log_warn_internal) do |msg| expect(msg).to match(/Unhandled exception.*Runtime error from test/) end allow(probe_manager).to receive(:add_probe).and_raise("Runtime error from test") - expect(component).to receive(:logger).and_return(logger) expect(component).to receive(:probe_notification_builder).and_return(probe_notification_builder) expect(probe_notification_builder).to receive(:build_received) expect(component).to receive(:probe_notifier_worker).and_return(probe_notifier_worker) @@ -209,27 +205,24 @@ context 'probe removal raises an exception' do it 'logs warning and consumes the exception' do expect(component).to receive(:telemetry).and_return(telemetry).at_least(:once) - expect(component).to receive(:logger).and_return(logger) - expect(logger).to receive(:info) do |message| + expect(component).to receive(:log_info_internal) do |message| expect(message).to match(/Received probe/) end - expect(logger).to receive(:warn) do |msg| + expect(component).to receive(:log_warn_internal) do |msg| expect(msg).to match(/Unhandled exception.*Runtime error 1 from test/) end expect(telemetry).to receive(:report) allow(probe_manager).to receive(:add_probe).and_raise("Runtime error 1 from test") - expect(component).to receive(:logger).and_return(logger) expect(component).to receive(:probe_notification_builder).and_return(probe_notification_builder) expect(probe_notification_builder).to receive(:build_received) expect(component).to receive(:probe_notifier_worker).and_return(probe_notifier_worker) expect(probe_notifier_worker).to receive(:add_status) - expect(logger).to receive(:warn) do |msg| + expect(component).to receive(:log_warn_internal) do |msg| expect(msg).to match(/Unhandled exception.*Runtime error 2 from test/) end - expect(component).to receive(:logger).and_return(logger) expect(telemetry).to receive(:report) expect(probe_manager).to receive(:remove_other_probes).with(['11']).and_raise("Runtime error 2 from test")