From b77473f0a7be8b0c44b2c71c7e9575b43bc16c5e Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Sat, 4 May 2024 12:58:42 +1000 Subject: [PATCH] write/macho: ensure Mach-O subsections are not zero size (#676) For Mach-O, `add_symbol_data` now ensures that the symbol size is at least 1 when subsections via symbols are enabled. This change was made to support linking with ld-prime. It is also unclear how this previously managed to work with ld64. `write::Object::add_subsection` no longer enables subsections via symbols for Mach-O. Use `set_subsections_via_symbols` instead. This change was made because Mach-O subsections are all or nothing, so this decision must be made before any symbols are added. `write::Object::add_subsection` no longer adds data to the subsection. This change was made because it was done with `append_section_data`, but this is often not the correct way to add data to the subsection. Usually `add_symbol_data` is a better choice. --- crates/examples/src/bin/simple_write.rs | 4 +- src/write/macho.rs | 15 ++---- src/write/mod.rs | 64 +++++++++++++++---------- tests/round_trip/comdat.rs | 16 +++---- tests/round_trip/elf.rs | 4 +- 5 files changed, 54 insertions(+), 49 deletions(-) diff --git a/crates/examples/src/bin/simple_write.rs b/crates/examples/src/bin/simple_write.rs index 76c1c183..3a350463 100644 --- a/crates/examples/src/bin/simple_write.rs +++ b/crates/examples/src/bin/simple_write.rs @@ -66,8 +66,8 @@ fn main() -> Result<(), Box> { main_data.extend_from_slice(&[0xc3]); // Add the main function in its own subsection (equivalent to -ffunction-sections). - let (main_section, main_offset) = - obj.add_subsection(StandardSection::Text, b"main", &main_data, 1); + let main_section = obj.add_subsection(StandardSection::Text, b"main"); + let main_offset = obj.append_section_data(main_section, &main_data, 1); // Add a globally visible symbol for the main function. obj.add_symbol(Symbol { name: b"main".into(), diff --git a/src/write/macho.rs b/src/write/macho.rs index d5260ef2..33ff5d8f 100644 --- a/src/write/macho.rs +++ b/src/write/macho.rs @@ -66,16 +66,6 @@ impl<'a> Object<'a> { // Private methods. impl<'a> Object<'a> { - pub(crate) fn macho_set_subsections_via_symbols(&mut self) { - let flags = match self.flags { - FileFlags::MachO { flags } => flags, - _ => 0, - }; - self.flags = FileFlags::MachO { - flags: flags | macho::MH_SUBSECTIONS_VIA_SYMBOLS, - }; - } - pub(crate) fn macho_segment_name(&self, segment: StandardSegment) -> &'static [u8] { match segment { StandardSegment::Text => &b"__TEXT"[..], @@ -570,10 +560,13 @@ impl<'a> Object<'a> { cpusubtype = cpu_subtype; } - let flags = match self.flags { + let mut flags = match self.flags { FileFlags::MachO { flags } => flags, _ => 0, }; + if self.macho_subsections_via_symbols { + flags |= macho::MH_SUBSECTIONS_VIA_SYMBOLS; + } macho.write_mach_header( buffer, MachHeader { diff --git a/src/write/mod.rs b/src/write/mod.rs index 198ef5e3..c16ed2e5 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -90,6 +90,9 @@ pub struct Object<'a> { macho_cpu_subtype: Option, #[cfg(feature = "macho")] macho_build_version: Option, + /// Mach-O MH_SUBSECTIONS_VIA_SYMBOLS flag. Only ever set if format is Mach-O. + #[cfg(feature = "macho")] + macho_subsections_via_symbols: bool, } impl<'a> Object<'a> { @@ -115,6 +118,8 @@ impl<'a> Object<'a> { macho_cpu_subtype: None, #[cfg(feature = "macho")] macho_build_version: None, + #[cfg(feature = "macho")] + macho_subsections_via_symbols: false, } } @@ -273,41 +278,34 @@ impl<'a> Object<'a> { /// Add a subsection. Returns the `SectionId` and section offset of the data. /// - /// Must not be called for sections that contain uninitialized data. - /// `align` must be a power of two. - pub fn add_subsection( - &mut self, - section: StandardSection, - name: &[u8], - data: &[u8], - align: u64, - ) -> (SectionId, u64) { - let section_id = if self.has_subsections_via_symbols() { - self.set_subsections_via_symbols(); + /// For Mach-O, this does not create a subsection, and instead uses the + /// section from [`Self::section_id`]. Use [`Self::set_subsections_via_symbols`] + /// to enable subsections via symbols. + pub fn add_subsection(&mut self, section: StandardSection, name: &[u8]) -> SectionId { + if self.has_subsections_via_symbols() { self.section_id(section) } else { let (segment, name, kind, flags) = self.subsection_info(section, name); let id = self.add_section(segment.to_vec(), name, kind); self.section_mut(id).flags = flags; id - }; - let offset = self.append_section_data(section_id, data, align); - (section_id, offset) + } } fn has_subsections_via_symbols(&self) -> bool { - match self.format { - BinaryFormat::Coff | BinaryFormat::Elf | BinaryFormat::Xcoff => false, - BinaryFormat::MachO => true, - _ => unimplemented!(), - } + self.format == BinaryFormat::MachO } - fn set_subsections_via_symbols(&mut self) { - match self.format { - #[cfg(feature = "macho")] - BinaryFormat::MachO => self.macho_set_subsections_via_symbols(), - _ => unimplemented!(), + /// Enable subsections via symbols if supported. + /// + /// This should be called before adding any subsections or symbols. + /// + /// For Mach-O, this sets the `MH_SUBSECTIONS_VIA_SYMBOLS` flag. + /// For other formats, this does nothing. + pub fn set_subsections_via_symbols(&mut self) { + #[cfg(feature = "macho")] + if self.format == BinaryFormat::MachO { + self.macho_subsections_via_symbols = true; } } @@ -480,6 +478,9 @@ impl<'a> Object<'a> { /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the /// symbol will indirectly point to the added data via the `__thread_vars` entry. /// + /// For Mach-O, if [`Self::set_subsections_via_symbols`] is enabled, this will + /// automatically ensure the data size is at least 1. + /// /// Returns the section offset of the data. /// /// Must not be called for sections that contain uninitialized data. @@ -488,9 +489,13 @@ impl<'a> Object<'a> { &mut self, symbol_id: SymbolId, section: SectionId, - data: &[u8], + mut data: &[u8], align: u64, ) -> u64 { + #[cfg(feature = "macho")] + if data.is_empty() && self.macho_subsections_via_symbols { + data = &[0]; + } let offset = self.append_section_data(section, data, align); self.set_symbol_data(symbol_id, section, offset, data.len() as u64); offset @@ -501,6 +506,9 @@ impl<'a> Object<'a> { /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the /// symbol will indirectly point to the added data via the `__thread_vars` entry. /// + /// For Mach-O, if [`Self::set_subsections_via_symbols`] is enabled, this will + /// automatically ensure the data size is at least 1. + /// /// Returns the section offset of the data. /// /// Must not be called for sections that contain initialized data. @@ -509,9 +517,13 @@ impl<'a> Object<'a> { &mut self, symbol_id: SymbolId, section: SectionId, - size: u64, + mut size: u64, align: u64, ) -> u64 { + #[cfg(feature = "macho")] + if size == 0 && self.macho_subsections_via_symbols { + size = 1; + } let offset = self.append_section_bss(section, size, align); self.set_symbol_data(symbol_id, section, offset, size); offset diff --git a/tests/round_trip/comdat.rs b/tests/round_trip/comdat.rs index d7005555..baeca821 100644 --- a/tests/round_trip/comdat.rs +++ b/tests/round_trip/comdat.rs @@ -13,11 +13,11 @@ fn coff_x86_64_comdat() { let mut object = write::Object::new(BinaryFormat::Coff, Architecture::X86_64, Endianness::Little); - let (section1, offset) = - object.add_subsection(write::StandardSection::Text, b"s1", &[0, 1, 2, 3], 4); + let section1 = object.add_subsection(write::StandardSection::Text, b"s1"); + let offset = object.append_section_data(section1, &[0, 1, 2, 3], 4); object.section_symbol(section1); - let (section2, _) = - object.add_subsection(write::StandardSection::Data, b"s1", &[0, 1, 2, 3], 4); + let section2 = object.add_subsection(write::StandardSection::Data, b"s1"); + object.append_section_data(section2, &[0, 1, 2, 3], 4); object.section_symbol(section2); let symbol = object.add_symbol(write::Symbol { @@ -132,10 +132,10 @@ fn elf_x86_64_comdat() { let mut object = write::Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little); - let (section1, offset) = - object.add_subsection(write::StandardSection::Text, b"s1", &[0, 1, 2, 3], 4); - let (section2, _) = - object.add_subsection(write::StandardSection::Data, b"s1", &[0, 1, 2, 3], 4); + let section1 = object.add_subsection(write::StandardSection::Text, b"s1"); + let offset = object.append_section_data(section1, &[0, 1, 2, 3], 4); + let section2 = object.add_subsection(write::StandardSection::Data, b"s1"); + object.append_section_data(section2, &[0, 1, 2, 3], 4); let symbol = object.add_symbol(write::Symbol { name: b"s1".to_vec(), diff --git a/tests/round_trip/elf.rs b/tests/round_trip/elf.rs index 60cac7c2..157015b2 100644 --- a/tests/round_trip/elf.rs +++ b/tests/round_trip/elf.rs @@ -13,8 +13,8 @@ fn symtab_shndx() { for i in 0..0x10000 { let name = format!("func{}", i).into_bytes(); - let (section, offset) = - object.add_subsection(write::StandardSection::Text, &name, &[0xcc], 1); + let section = object.add_subsection(write::StandardSection::Text, &name); + let offset = object.append_section_data(section, &[0xcc], 1); object.add_symbol(write::Symbol { name, value: offset,