Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable crashtracking without profiler #3826

Merged
merged 17 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions .gitlab/install_datadog_deps.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,21 @@

puts gem_version_mapping

gem_version_mapping.each do |gem, version|
env = {}
env = {
'GEM_HOME' => versioned_path.to_s,
# Install `datadog` gem locally without its profiling native extension
'DD_PROFILING_NO_EXTENSION' => 'true',
}

[
'debase-ruby_core_source',
'ffi',
'libddwaf',
'msgpack',
'libdatadog', # libdatadog MUST be installed before datadog to ensure libdatadog native extension is compiled
'datadog',
].each do |gem|
version = gem_version_mapping.delete(gem)

gem_install_cmd = "gem install #{gem} "\
"--version #{version} "\
Expand All @@ -73,19 +86,13 @@

case gem
when 'ffi'
gem_install_cmd << "--install-dir #{versioned_path} "
# Install `ffi` gem with its built-in `libffi` native extension instead of using system's `libffi`
gem_install_cmd << '-- --disable-system-libffi '
when 'datadog'
# Install `datadog` gem locally without its profiling native extension
env['DD_PROFILING_NO_EXTENSION'] = 'true'
gem_install_cmd =
"gem install --local #{ENV.fetch('DATADOG_GEM_LOCATION')} "\
'--no-document '\
'--ignore-dependencies '\
"--install-dir #{versioned_path} "
else
gem_install_cmd << "--install-dir #{versioned_path} "
end

puts "Execute: #{gem_install_cmd}"
Expand All @@ -99,6 +106,14 @@
end
end

raise "#{gem_version_mapping.keys.join(',')} are not installed." if gem_version_mapping.any?

datadog_gem_path = versioned_path.join("gems/datadog-#{ENV.fetch('RUBY_PACKAGE_VERSION')}")
libdatadog_so_file = "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}.so"
unless File.exist?("#{datadog_gem_path}/lib/#{libdatadog_so_file}")
raise "Missing #{libdatadog_so_file} in #{datadog_gem_path}."
end
Comment on lines +111 to +115
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also get this check by setting the DD_FAIL_INSTALL_IF_MISSING_EXTENSION env variable to "true" when calling gem install :)


FileUtils.cd(versioned_path.join("extensions/#{Gem::Platform.local}"), verbose: true) do
# Symlink those directories to be utilized by Ruby compiled with shared libraries
FileUtils.ln_sf Gem.extension_api_version, ruby_api_version
Expand Down
3 changes: 3 additions & 0 deletions Matrixfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
'' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
'core-old' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby'
},
'crashtracking' => {
'' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ❌ jruby',
},
'appsec:main' => {
'' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby'
},
Expand Down
13 changes: 11 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ namespace :spec do
task all: [:main, :benchmark,
:rails, :railsredis, :railsredis_activesupport, :railsactivejob,
:elasticsearch, :http, :redis, :sidekiq, :sinatra, :hanami, :hanami_autoinstrument,
:profiling]
:profiling, :crashtracking]

desc '' # "Explicitly hiding from `rake -T`"
RSpec::Core::RakeTask.new(:main) do |t, args|
TonyCTHsu marked this conversation as resolved.
Show resolved Hide resolved
t.pattern = 'spec/**/*_spec.rb'
t.exclude_pattern = 'spec/**/{contrib,benchmark,redis,auto_instrument,opentelemetry,profiling}/**/*_spec.rb,'\
t.exclude_pattern = 'spec/**/{contrib,benchmark,redis,auto_instrument,opentelemetry,profiling,crashtracking}/**/*_spec.rb,'\
' spec/**/{auto_instrument,opentelemetry}_spec.rb, spec/datadog/gem_packaging_spec.rb'
t.rspec_opts = args.to_a.join(' ')
end
Expand Down Expand Up @@ -170,6 +170,15 @@ namespace :spec do
t.rspec_opts = args.to_a.join(' ')
end

# rubocop:disable Style/MultilineBlockChain
RSpec::Core::RakeTask.new(:crashtracking) do |t, args|
t.pattern = 'spec/datadog/core/crashtracking/**/*_spec.rb'
t.rspec_opts = args.to_a.join(' ')
end.tap do |t|
Rake::Task[t.name].enhance(["compile:libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}"])
end
# rubocop:enable Style/MultilineBlockChain
TonyCTHsu marked this conversation as resolved.
Show resolved Hide resolved

desc '' # "Explicitly hiding from `rake -T`"
RSpec::Core::RakeTask.new(:contrib) do |t, args|
contrib_paths = [
Expand Down
11 changes: 6 additions & 5 deletions ext/libdatadog_api/crashtracker.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@

static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self);
static void crashtracker_init(VALUE profiling_module);
static void crashtracker_init(VALUE crashtracking_module);

// Used to report Ruby VM crashes.
// Once initialized, segfaults will be reported automatically using libdatadog.

void DDTRACE_EXPORT Init_libdatadog_api(void) {
VALUE datadog_module = rb_define_module("Datadog");
VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling");
VALUE core_module = rb_define_module_under(datadog_module, "Core");
VALUE crashtracking_module = rb_define_module_under(core_module, "Crashtracking");

crashtracker_init(profiling_module);
crashtracker_init(crashtracking_module);
}

void crashtracker_init(VALUE profiling_module) {
VALUE crashtracker_class = rb_define_class_under(profiling_module, "Crashtracker", rb_cObject);
void crashtracker_init(VALUE crashtracking_module) {
VALUE crashtracker_class = rb_define_class_under(crashtracking_module, "Component", rb_cObject);

rb_define_singleton_method(crashtracker_class, "_native_start_or_update_on_fork", _native_start_or_update_on_fork, -1);
rb_define_singleton_method(crashtracker_class, "_native_stop", _native_stop, 0);
Expand Down
16 changes: 15 additions & 1 deletion lib/datadog/core/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
require_relative '../../tracing/component'
require_relative '../../profiling/component'
require_relative '../../appsec/component'
require_relative '../crashtracking/component'

module Datadog
module Core
Expand Down Expand Up @@ -58,6 +59,17 @@ def build_runtime_metrics_worker(settings)
def build_telemetry(settings, agent_settings, logger)
Telemetry::Component.build(settings, agent_settings, logger)
end

def build_crashtracker(settings, agent_settings, logger:)
return unless settings.crashtracking.enabled

if (libdatadog_api_failure = Datadog::Core::Crashtracking::Component::LIBDATADOG_API_FAILURE)
logger.debug("Cannot enable crashtracking: #{libdatadog_api_failure}")
return
end

Datadog::Core::Crashtracking::Component.build(settings, agent_settings, logger: logger)
ivoanjo marked this conversation as resolved.
Show resolved Hide resolved
end
end

include Datadog::Tracing::Component::InstanceMethods
Expand All @@ -70,6 +82,7 @@ def build_telemetry(settings, agent_settings, logger)
:runtime_metrics,
:telemetry,
:tracer,
:crashtracker,
:appsec

def initialize(settings)
Expand All @@ -83,11 +96,12 @@ def initialize(settings)

@remote = Remote::Component.build(settings, agent_settings)
@tracer = self.class.build_tracer(settings, agent_settings, logger: @logger)
@crashtracker = self.class.build_crashtracker(settings, agent_settings, logger: @logger)

@profiler, profiler_logger_extra = Datadog::Profiling::Component.build_profiler_component(
settings: settings,
agent_settings: agent_settings,
optional_tracer: @tracer,
optional_tracer: @tracer
)
@environment_logger_extra.merge!(profiler_logger_extra) if profiler_logger_extra

Expand Down
28 changes: 18 additions & 10 deletions lib/datadog/core/configuration/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -451,17 +451,16 @@ def initialize(*_)
o.default 60
end

# Enables reporting of information when the Ruby VM crashes.
#
# This feature is no longer experimental, and we plan to deprecate this setting and replace it with a
# properly-named one soon.
#
# @default `DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED` environment variable as a boolean,
# otherwise `true`
# DEV-3.0: Remove `experimental_crash_tracking_enabled` option
option :experimental_crash_tracking_enabled do |o|
o.type :bool
o.env 'DD_PROFILING_EXPERIMENTAL_CRASH_TRACKING_ENABLED'
o.default true
o.after_set do |_, _, precedence|
unless precedence == Datadog::Core::Configuration::Option::Precedence::DEFAULT
Core.log_deprecation(key: :experimental_crash_tracking_enabled) do
'The profiling.advanced.experimental_crash_tracking_enabled setting has been deprecated for removal '\
'and no longer does anything. Please remove it from your Datadog.configure block.'
end
end
end
end
end

Expand Down Expand Up @@ -833,6 +832,15 @@ def initialize(*_)
option :service
end

settings :crashtracking do
# Enables reporting of information when Ruby VM crashes.
option :enabled do |o|
o.type :bool
o.default true
o.env 'DD_CRASHTRACKING_ENABLED'
ivoanjo marked this conversation as resolved.
Show resolved Hide resolved
end
end

# TODO: Tracing should manage its own settings.
# Keep this extension here for now to keep things working.
extend Datadog::Tracing::Configuration::Settings
Expand Down
21 changes: 21 additions & 0 deletions lib/datadog/core/crashtracking/agent_base_url.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# frozen_string_literal: true

require_relative '../configuration/ext'

module Datadog
module Core
module Crashtracking
# This module provides a method to resolve the base URL of the agent
module AgentBaseUrl
def self.resolve(agent_settings)
case agent_settings.adapter
when Datadog::Core::Configuration::Ext::Agent::HTTP::ADAPTER
"#{agent_settings.ssl ? 'https' : 'http'}://#{agent_settings.hostname}:#{agent_settings.port}/"
when Datadog::Core::Configuration::Ext::Agent::UnixSocket::ADAPTER
"unix://#{agent_settings.uds_path}"
end
end
end
end
end
end
111 changes: 111 additions & 0 deletions lib/datadog/core/crashtracking/component.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# frozen_string_literal: true

require 'libdatadog'

require_relative 'tag_builder'
require_relative 'agent_base_url'
require_relative '../utils/only_once'
require_relative '../utils/at_fork_monkey_patch'

module Datadog
module Core
module Crashtracking
# Used to report Ruby VM crashes.
#
# NOTE: The crashtracker native state is a singleton;
# so even if you create multiple instances of `Crashtracking::Component` and start them,
# it only works as "last writer wins". Same for stop -- there's only one state, so calling stop
# on it will stop the crash tracker, regardless of which instance started it.
#
# Methods prefixed with _native_ are implemented in `crashtracker.c`
class Component
LIBDATADOG_API_FAILURE =
begin
require "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}"
nil
rescue LoadError => e
e.message
end

ONLY_ONCE = Core::Utils::OnlyOnce.new

def self.build(settings, agent_settings, logger:)
tags = TagBuilder.call(settings)
agent_base_url = AgentBaseUrl.resolve(agent_settings)
logger.warn('Missing agent base URL; cannot enable crash tracking') unless agent_base_url

ld_library_path = ::Libdatadog.ld_library_path
logger.warn('Missing ld_library_path; cannot enable crash tracking') unless ld_library_path

path_to_crashtracking_receiver_binary = ::Libdatadog.path_to_crashtracking_receiver_binary
unless path_to_crashtracking_receiver_binary
logger.warn('Missing path_to_crashtracking_receiver_binary; cannot enable crash tracking')
end

return unless agent_base_url
return unless ld_library_path
return unless path_to_crashtracking_receiver_binary

new(
tags: tags,
agent_base_url: agent_base_url,
ld_library_path: ld_library_path,
path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
logger: logger
).tap(&:start)
end

def initialize(tags:, agent_base_url:, ld_library_path:, path_to_crashtracking_receiver_binary:, logger:)
@tags = tags
@agent_base_url = agent_base_url
@ld_library_path = ld_library_path
@path_to_crashtracking_receiver_binary = path_to_crashtracking_receiver_binary
@logger = logger
end

def start
Utils::AtForkMonkeyPatch.apply!

start_or_update_on_fork(action: :start)
ONLY_ONCE.run do
Utils::AtForkMonkeyPatch.at_fork(:child) do
# Must NOT reference `self` here, as only the first instance will
# be captured by the ONLY_ONCE and we want to pick the latest active one
# (which may have different tags or agent config)
Datadog.send(:components).crashtracker&.update_on_fork
end
end
end

def update_on_fork
start_or_update_on_fork(action: :update_on_fork)
end

def stop
self.class._native_stop
logger.debug('Crash tracking stopped successfully')
rescue => e
logger.error("Failed to stop crash tracking: #{e.message}")
end

private

attr_reader :tags, :agent_base_url, :ld_library_path, :path_to_crashtracking_receiver_binary, :logger

def start_or_update_on_fork(action:)
self.class._native_start_or_update_on_fork(
action: action,
exporter_configuration: [:agent, agent_base_url],
path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
ld_library_path: ld_library_path,
tags_as_array: tags.to_a,
upload_timeout_seconds: 1
ivoanjo marked this conversation as resolved.
Show resolved Hide resolved
)
logger.debug("Crash tracking #{action} successfully")
rescue => e
logger.error("Failed to #{action} crash tracking: #{e.message}")
end
end
end
end
end
37 changes: 37 additions & 0 deletions lib/datadog/core/crashtracking/tag_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true

require_relative '../utils'
require_relative '../environment/socket'
require_relative '../environment/identity'
require_relative '../environment/git'

module Datadog
module Core
module Crashtracking
# This module builds a hash of tags
module TagBuilder
def self.call(settings)
hash = {
'host' => Environment::Socket.hostname,
'process_id' => Process.pid.to_s,
'runtime_engine' => Environment::Identity.lang_engine,
'runtime-id' => Environment::Identity.id,
'runtime_platform' => Environment::Identity.lang_platform,
'runtime_version' => Environment::Identity.lang_version,
'env' => settings.env,
'service' => settings.service,
'version' => settings.version,
'git.repository_url' => Environment::Git.git_repository_url,
'git.commit.sha' => Environment::Git.git_commit_sha,
'is_crash' => true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch! I was talking with @sanchda about this yesterday and he mentioned libdatadog wasn't adding these automatically.

In Python we also add severity:crash (see DataDog/dd-trace-py#10199 ). I was planning on doing a PR to add this one as well, but in case you want to go ahead ;)

}.compact

# Make sure everything is an utf-8 string, to avoid encoding issues in downstream
settings.tags.merge(hash).each_with_object({}) do |(key, value), h|
h[Utils.utf8_encode(key)] = Utils.utf8_encode(value)
end
end
end
end
end
end
1 change: 0 additions & 1 deletion lib/datadog/profiling.rb
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def self.allocation_count # rubocop:disable Lint/NestedMethodDefinition (On purp
require_relative 'profiling/collectors/idle_sampling_helper'
require_relative 'profiling/collectors/stack'
require_relative 'profiling/collectors/thread_context'
require_relative 'profiling/crashtracker'
require_relative 'profiling/stack_recorder'
require_relative 'profiling/exporter'
require_relative 'profiling/flush'
Expand Down
Loading
Loading