diff --git a/lib/ddtrace/profiling/collectors/code_provenance.rb b/lib/ddtrace/profiling/collectors/code_provenance.rb index 1dbd3e1b0e3..78c19da74d6 100644 --- a/lib/ddtrace/profiling/collectors/code_provenance.rb +++ b/lib/ddtrace/profiling/collectors/code_provenance.rb @@ -58,12 +58,32 @@ def record_library(library) libraries_by_path[library.path] = library end + # Ruby hash maps are guaranteed to keep the insertion order of keys. Here, we sort @libraries_by_path so + # that the hash can be iterated in reverse order of paths. + # + # Why we do this: We do this to make sure that if there are libraries with paths that are prefixes of other + # libraries, e.g. '/home/foo' and '/home/foo/bar', we match to the longest path first. + # When reverse sorting paths as strings, '/home/foo/bar' will come before '/home/foo'. + # + # This way, when we iterate the @libraries_by_path hash, we know the first hit will also be the longest. + # + # Alternatively/in the future we could instead use a trie to match paths, but I doubt for the data sizes we're + # looking at that a trie is that much faster than using Ruby's built-in native collections. + def sort_libraries_by_longest_path_first + @libraries_by_path = @libraries_by_path.sort.reverse!.to_h + end + def record_loaded_specs(loaded_specs) + recorded_library = false + loaded_specs.each do |spec| next if libraries_by_name.key?(spec.name) record_library(Library.new(type: 'library', name: spec.name, version: spec.version, path: spec.gem_dir)) + recorded_library = true end + + sort_libraries_by_longest_path_first if recorded_library end def record_loaded_files(loaded_files) diff --git a/spec/ddtrace/profiling/collectors/code_provenance_spec.rb b/spec/ddtrace/profiling/collectors/code_provenance_spec.rb index e5c8c495652..73a164c73a6 100644 --- a/spec/ddtrace/profiling/collectors/code_provenance_spec.rb +++ b/spec/ddtrace/profiling/collectors/code_provenance_spec.rb @@ -74,6 +74,34 @@ it 'returns self' do expect(code_provenance.refresh).to be code_provenance end + + context "when a gem's path is inside another gem's path" do + # I'm not entirely sure if this can happen in end-user apps, but can happen in CI if bundler is configured to + # install dependencies into a subfolder of ddtrace. In particular GitHub Actions does this. + + it 'matches the loaded file to the longest matching path' do + code_provenance.refresh( + loaded_files: ['/dd-trace-rb/vendor/bundle/ruby/2.7.0/gems/byebug-11.1.3/lib/byebug.rb'], + loaded_specs: [ + instance_double( + Gem::Specification, + name: 'ddtrace', + version: '1.2.3', + gem_dir: '/dd-trace-rb' + ), + instance_double( + Gem::Specification, + name: 'byebug', + version: '4.5.6', + gem_dir: '/dd-trace-rb/vendor/bundle/ruby/2.7.0/gems/byebug-11.1.3' + ) + ], + ) + + expect(code_provenance.generate).to have(1).item + expect(code_provenance.generate.first).to have_attributes(name: 'byebug') + end + end end describe '#generate_json' do