From 3a34582cbc4c1e129e0f2b032ded596050fa2818 Mon Sep 17 00:00:00 2001 From: geemili Date: Wed, 20 Nov 2024 10:07:55 -0700 Subject: [PATCH] teach std.debug to convert addresses to ELF symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading `std.debug.Dwarf ` fails, `std.debug.SelfInfo` will now try to load function names from `symtab`. Written under contract from TigerBeetle. © 2024 TigerBeetle --- lib/std/debug.zig | 3 +- lib/std/debug/ElfSymTab.zig | 254 ++++++++++++++++++++++++++++++++++++ lib/std/debug/SelfInfo.zig | 132 +++++++++++++------ 3 files changed, 350 insertions(+), 39 deletions(-) create mode 100644 lib/std/debug/ElfSymTab.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 0756e456ff59..ab5e4bf6bbd6 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -16,6 +16,7 @@ const native_endian = native_arch.endian(); pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); pub const FixedBufferReader = @import("debug/FixedBufferReader.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); +pub const ElfSymTab = @import("debug/ElfSymTab.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); @@ -945,7 +946,7 @@ fn printUnwindError(debug_info: *SelfInfo, out_stream: anytype, address: usize, pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), + error.MissingDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, }; diff --git a/lib/std/debug/ElfSymTab.zig b/lib/std/debug/ElfSymTab.zig new file mode 100644 index 000000000000..c25babb34229 --- /dev/null +++ b/lib/std/debug/ElfSymTab.zig @@ -0,0 +1,254 @@ +//! Similar to std.debug.Dwarf, but only using symbol info from an ELF file. + +const ElfSymTab = @This(); + +endian: std.builtin.Endian, + +base_address: usize, +mapped_memory: []align(std.mem.page_size) const u8, +sections: SectionArray, + +/// Populated by `scanAllSymbols`. +symbol_list: std.ArrayListUnmanaged(Symbol) = .empty, + +pub const Symbol = struct { + name: []const u8, + start: u64, + end: u64, +}; + +pub const OpenError = ScanError; + +/// Initialize DWARF info. The caller has the responsibility to initialize most +/// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the +/// main binary file (not the secondary debug info file). +pub fn open(d: *ElfSymTab, gpa: Allocator) OpenError!void { + try d.scanAllSymbols(gpa); +} + +pub const ScanError = error{ + InvalidDebugInfo, + MissingDebugInfo, +} || Allocator.Error || std.debug.FixedBufferReader.Error; + +fn scanAllSymbols(ei: *ElfSymTab, allocator: Allocator) OpenError!void { + const symtab: Section = ei.sections[@intFromEnum(Section.Id.symtab)].?; + const strtab: Section = ei.sections[@intFromEnum(Section.Id.strtab)].?; + + const num_symbols = symtab.data.len / symtab.entry_size; + const symbols = @as([*]const elf.Sym, @ptrCast(@alignCast(symtab.data.ptr)))[0..num_symbols]; + for (symbols) |symbol| { + if (symbol.st_name == 0) continue; + if (symbol.st_shndx == elf.SHN_UNDEF) continue; + + const symbol_name = getStringFromTable(strtab.data, symbol.st_name) orelse { + // If it doesn't have a symbol name, we can't really use it for debugging purposes + continue; + }; + + // TODO: Does SHN_ABS make a difference for this use case? + // if (symbol.st_shndx == elf.SHN_ABS) { + // continue; + // } + + // TODO: handle relocatable symbols in DYN type binaries + try ei.symbol_list.append(allocator, .{ + .name = symbol_name, + .start = symbol.st_value, + .end = symbol.st_value + symbol.st_size, + }); + } +} + +pub const LoadError = error{ + InvalidDebugInfo, + MissingDebugInfo, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfEndian, + /// TODO: implement this and then remove this error code + UnimplementedElfForeignEndian, + /// TODO: implement this and then remove this error code + UnimplementedElfType, + /// The debug info may be valid but this implementation uses memory + /// mapping which limits things to usize. If the target debug info is + /// 64-bit and host is 32-bit, there may be debug info that is not + /// supportable using this method. + Overflow, + + PermissionDenied, + LockedMemoryLimitExceeded, + MemoryMappingNotSupported, +} || Allocator.Error || std.fs.File.OpenError || OpenError; + +/// Reads symbol info from an already mapped ELF file. +pub fn load( + gpa: Allocator, + mapped_mem: []align(std.mem.page_size) const u8, + expected_crc: ?u32, +) LoadError!ElfSymTab { + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + if (endian != native_endian) return error.UnimplementedElfForeignEndian; + if (hdr.e_type != .EXEC) return error.UnimplementedElfType; + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: ElfSymTab.SectionArray = ElfSymTab.null_section_array; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + var section_index: ?usize = null; + inline for (@typeInfo(ElfSymTab.Section.Id).@"enum".fields, 0..) |sect, i| { + if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + const section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_reader); + + const decompressed_section = try gpa.alloc(u8, chdr.ch_size); + errdefer gpa.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .entry_size = shdr.sh_entsize, + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .entry_size = shdr.sh_entsize, + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(ElfSymTab.Section.Id.strtab)] == null or + sections[@intFromEnum(ElfSymTab.Section.Id.symtab)] == null; + + if (missing_debug_info) { + return error.MissingDebugInfo; + } + + var ei: ElfSymTab = .{ + .base_address = 0, + .endian = endian, + .sections = sections, + .mapped_memory = mapped_mem, + }; + + try ElfSymTab.open(&ei, gpa); + + return ei; +} + +pub fn deinit(self: *ElfSymTab, allocator: std.mem.Allocator) void { + for (self.sections) |section_opt| { + const s = section_opt orelse continue; + allocator.free(s.data); + } + self.symbol_list.deinit(allocator); +} + +const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); +pub const SectionArray = [num_sections]?Section; +pub const null_section_array = [_]?Section{null} ** num_sections; + +pub const Section = struct { + entry_size: usize, + data: []const u8, + // Module-relative virtual address. + // Only set if the section data was loaded from disk. + virtual_address: ?usize = null, + // If `data` is owned by this Dwarf. + owned: bool, + + pub const Id = enum { + strtab, + symtab, + }; + + // For sections that are not memory mapped by the loader, this is an offset + // from `data.ptr` to where the section would have been mapped. Otherwise, + // `data` is directly backed by the section and the offset is zero. + pub fn virtualOffset(self: Section, base_address: usize) i64 { + return if (self.virtual_address) |va| + @as(i64, @intCast(base_address + va)) - + @as(i64, @intCast(@intFromPtr(self.data.ptr))) + else + 0; + } +}; + +pub fn section(ei: ElfSymTab, elf_section: Section.Id) ?[]const u8 { + return if (ei.sections[@intFromEnum(elf_section)]) |s| s.data else null; +} + +pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { + _ = allocator; + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + for (self.symbol_list.items) |symbol| { + if (symbol.start <= relocated_address and relocated_address <= symbol.end) { + return .{ + .name = symbol.name, + }; + } + } + return .{}; +} + +fn getStringFromTable(string_table: []const u8, pos: usize) ?[]const u8 { + if (pos == 0) return null; + const section_name_end = std.mem.indexOfScalarPos(u8, string_table, pos, '\x00') orelse return null; + return string_table[pos..section_name_end]; +} + +pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = cast(usize, offset) orelse return error.Overflow; + const end = start + (cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} + +const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); + +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const elf = std.elf; +const mem = std.mem; +const assert = std.debug.assert; +const cast = std.math.cast; +const maxInt = std.math.maxInt; +const MemoryAccessor = std.debug.MemoryAccessor; +const FixedBufferReader = std.debug.FixedBufferReader; diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 544cf0ac6ff4..81e489dfc437 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -17,6 +17,7 @@ const pdb = std.pdb; const assert = std.debug.assert; const posix = std.posix; const elf = std.elf; +const ElfSymTab = std.debug.ElfSymTab; const Dwarf = std.debug.Dwarf; const Pdb = std.debug.Pdb; const File = std.fs.File; @@ -462,8 +463,8 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { return obj_di; } - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); + const obj_ei = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_ei); var sections: Dwarf.SectionArray = Dwarf.null_section_array; if (ctx.gnu_eh_frame) |eh_frame_hdr| { @@ -477,15 +478,23 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { }; } - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); - obj_di.base_address = ctx.base_address; + obj_ei.* = try Elf.readDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); - // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding - obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; + switch (obj_ei.*) { + .dwarf => |*dwarf_info| { + dwarf_info.base_address = ctx.base_address; - try self.address_map.putNoClobber(ctx.base_address, obj_di); + // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding + dwarf_info.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; + }, + .symtab => |*symtab| { + symtab.base_address = ctx.base_address; + }, + } + + try self.address_map.putNoClobber(ctx.base_address, obj_ei); - return obj_di; + return obj_ei; } fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { @@ -794,7 +803,7 @@ pub const Module = switch (native_os) { }; } }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Elf, .wasi, .emscripten => struct { pub fn deinit(self: *@This(), allocator: Allocator) void { _ = self; @@ -1036,38 +1045,85 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { } } -/// Reads debug info from an ELF file, or the current binary if none in specified. -/// If the required sections aren't present but a reference to external debug info is, -/// then this this function will recurse to attempt to load the debug sections from -/// an external file. -pub fn readElfDebugInfo( - allocator: Allocator, - elf_filename: ?[]const u8, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !Dwarf.ElfModule { - nosuspend { - const elf_file = (if (elf_filename) |filename| blk: { - break :blk fs.cwd().openFile(filename, .{}); - } else fs.openSelfExe(.{})) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, +pub const Elf = union(enum) { + dwarf: Dwarf.ElfModule, + symtab: ElfSymTab, + + /// Reads debug info from an ELF file, or the current binary if none in specified. + /// If the required sections aren't present but a reference to external debug info is, + /// then this this function will recurse to attempt to load the debug sections from + /// an external file. + pub fn readDebugInfo( + allocator: Allocator, + elf_filename: ?[]const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(mem.page_size) const u8, + ) !Elf { + nosuspend { + const elf_file = (if (elf_filename) |filename| blk: { + break :blk fs.cwd().openFile(filename, .{}); + } else fs.openSelfExe(.{})) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + + const mapped_mem = try mapWholeFile(elf_file); + + load_dwarf: { + const dwarf_info = Dwarf.ElfModule.load( + allocator, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_filename, + ) catch { + break :load_dwarf; + }; + return Elf{ .dwarf = dwarf_info }; + } + + load_symtab: { + const symtab = ElfSymTab.load( + allocator, + mapped_mem, + expected_crc, + ) catch { + break :load_symtab; + }; + return Elf{ .symtab = symtab }; + } + + return error.MissingDebugInfo; + } + } + + pub fn deinit(this: *@This(), allocator: Allocator) void { + return switch (this.*) { + .dwarf => |*dwarf_info| dwarf_info.deinit(allocator), + .symtab => |*symtab| symtab.deinit(allocator), + }; + } + + pub fn getDwarfInfoForAddress(this: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return switch (this.*) { + .dwarf => |dwarf_info| &dwarf_info.dwarf, + .symtab => null, }; + } - const mapped_mem = try mapWholeFile(elf_file); - return Dwarf.ElfModule.load( - allocator, - mapped_mem, - build_id, - expected_crc, - parent_sections, - parent_mapped_mem, - elf_filename, - ); + pub fn getSymbolAtAddress(this: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { + return switch (this.*) { + .dwarf => |*dwarf_info| dwarf_info.getSymbolAtAddress(allocator, address), + .symtab => |*symtab| symtab.getSymbolAtAddress(allocator, address), + }; } -} +}; const MachoSymbol = struct { strx: u32,