From 18c80c166f65afbaca3815fd163a9501f6f90826 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 14 Mar 2023 10:25:14 +0100 Subject: [PATCH] zig: 0.10 fix macOS build Relevant upstream issue: ziglang/zig#14559 The patch is a backport of fixes that landed in zig-master and can be removed with zig-0.11 release. Additionally, make sure we link statically against LLVM to avoid unpleasant runtime surprises originating from mixing static and dynamic LLVM libraries. Finally, unbreak Zig 0.10.1 on macOS. --- pkgs/development/compilers/zig/0.10.nix | 13 +- .../development/compilers/zig/zig_14559.patch | 367 ++++++++++++++++++ pkgs/top-level/all-packages.nix | 3 +- 3 files changed, 377 insertions(+), 6 deletions(-) create mode 100644 pkgs/development/compilers/zig/zig_14559.patch diff --git a/pkgs/development/compilers/zig/0.10.nix b/pkgs/development/compilers/zig/0.10.nix index 860baf1d60451..a6e253adde665 100644 --- a/pkgs/development/compilers/zig/0.10.nix +++ b/pkgs/development/compilers/zig/0.10.nix @@ -35,6 +35,12 @@ stdenv.mkDerivation rec { llvm ]); + patches = [ + # Backport alignment related panics from zig-master to 0.10. + # Upstream issue: https://github.com/ziglang/zig/issues/14559 + ./zig_14559.patch + ]; + preBuild = '' export HOME=$TMPDIR; ''; @@ -49,6 +55,9 @@ stdenv.mkDerivation rec { # file RPATH_CHANGE could not write new RPATH "-DCMAKE_SKIP_BUILD_RPATH=ON" + # always link against static build of LLVM + "-DZIG_STATIC_LLVM=ON" + # ensure determinism in the compiler build "-DZIG_TARGET_MCPU=baseline" ]; @@ -75,9 +84,5 @@ stdenv.mkDerivation rec { license = licenses.mit; maintainers = with maintainers; [ aiotter andrewrk AndersonTorres ]; platforms = platforms.unix; - # Build fails on Darwin on both AArch64 and x86_64: - # https://github.com/NixOS/nixpkgs/pull/210324#issuecomment-1381313616 - # https://github.com/NixOS/nixpkgs/pull/210324#issuecomment-1381236045 - broken = stdenv.isDarwin; }; } diff --git a/pkgs/development/compilers/zig/zig_14559.patch b/pkgs/development/compilers/zig/zig_14559.patch new file mode 100644 index 0000000000000..95f0bdf701e29 --- /dev/null +++ b/pkgs/development/compilers/zig/zig_14559.patch @@ -0,0 +1,367 @@ +From 405801d8a8be734425eca4f3eebc56287804ac93 Mon Sep 17 00:00:00 2001 +From: Jakub Konka +Date: Sun, 5 Feb 2023 10:04:34 +0100 +Subject: [PATCH] macho: temp fix alignment and enable some logs + +--- + src/link/MachO/Object.zig | 80 ++++++++++++++++++++++++++------------ + src/link/MachO/ZldAtom.zig | 29 +++++++------- + src/link/MachO/zld.zig | 22 +++++------ + 3 files changed, 79 insertions(+), 52 deletions(-) + +diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig +index 401184da515..05638c1f858 100644 +--- a/src/link/MachO/Object.zig ++++ b/src/link/MachO/Object.zig +@@ -54,12 +54,18 @@ atom_by_index_table: []AtomIndex = undefined, + /// Can be undefined as set together with in_symtab. + globals_lookup: []i64 = undefined, + ++/// All relocs sorted and flattened. ++relocs: std.ArrayListUnmanaged(macho.relocation_info) = .{}, ++sect_relocs_lookup: std.ArrayListUnmanaged(u32) = .{}, ++ + atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, + + pub fn deinit(self: *Object, gpa: Allocator) void { + self.atoms.deinit(gpa); + gpa.free(self.name); + gpa.free(self.contents); ++ self.relocs.deinit(gpa); ++ self.sect_relocs_lookup.deinit(gpa); + if (self.in_symtab) |_| { + gpa.free(self.source_symtab_lookup); + gpa.free(self.source_address_lookup); +@@ -101,6 +107,10 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) + return error.MismatchedCpuArchitecture; + } + ++ const nsects = self.getSourceSections().len; ++ try self.sect_relocs_lookup.resize(allocator, nsects); ++ mem.set(u32, self.sect_relocs_lookup.items, 0); ++ + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], +@@ -110,13 +120,11 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) + .SYMTAB => { + const symtab = cmd.cast(macho.symtab_command).?; + self.in_symtab = @ptrCast( +- [*]const macho.nlist_64, +- @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), ++ [*]align(1) const macho.nlist_64, ++ self.contents.ptr + symtab.symoff, + )[0..symtab.nsyms]; + self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + +- const nsects = self.getSourceSections().len; +- + self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); + self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); +@@ -192,6 +200,17 @@ const SymbolAtIndex = struct { + return mem.sliceTo(@ptrCast([*:0]const u8, ctx.in_strtab.?.ptr + off), 0); + } + ++ fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 { ++ const sym = self.getSymbol(ctx); ++ if (!sym.ext()) { ++ const sym_name = self.getSymbolName(ctx); ++ if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 0; ++ return 1; ++ } ++ if (sym.weakDef() or sym.pext()) return 2; ++ return 3; ++ } ++ + /// Performs lexicographic-like check. + /// * lhs and rhs defined + /// * if lhs == rhs +@@ -206,23 +225,15 @@ const SymbolAtIndex = struct { + if (lhs.sect() and rhs.sect()) { + if (lhs.n_value == rhs.n_value) { + if (lhs.n_sect == rhs.n_sect) { +- if (lhs.ext() and rhs.ext()) { +- if ((lhs.pext() or lhs.weakDef()) and (rhs.pext() or rhs.weakDef())) { +- return false; +- } else return rhs.pext() or rhs.weakDef(); +- } else { +- const lhs_name = lhs_index.getSymbolName(ctx); +- const lhs_temp = mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); +- const rhs_name = rhs_index.getSymbolName(ctx); +- const rhs_temp = mem.startsWith(u8, rhs_name, "l") or mem.startsWith(u8, rhs_name, "L"); +- if (lhs_temp and rhs_temp) { +- return false; +- } else return rhs_temp; +- } ++ const lhs_senior = lhs_index.getSymbolSeniority(ctx); ++ const rhs_senior = rhs_index.getSymbolSeniority(ctx); ++ if (lhs_senior == rhs_senior) { ++ return lessThanByNStrx(ctx, lhs_index, rhs_index); ++ } else return lhs_senior < rhs_senior; + } else return lhs.n_sect < rhs.n_sect; + } else return lhs.n_value < rhs.n_value; + } else if (lhs.undf() and rhs.undf()) { +- return false; ++ return lessThanByNStrx(ctx, lhs_index, rhs_index); + } else return rhs.undf(); + } + +@@ -393,6 +404,16 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { + zld.sections.items(.header)[out_sect_id].sectName(), + }); + ++ // Parse all relocs for the input section, and sort in descending order. ++ // Previously, I have wrongly assumed the compilers output relocations for each ++ // section in a sorted manner which is simply not true. ++ const start = @intCast(u32, self.relocs.items.len); ++ if (self.getSourceRelocs(section.header)) |relocs| { ++ try self.relocs.appendUnalignedSlice(gpa, relocs); ++ std.sort.sort(macho.relocation_info, self.relocs.items[start..], {}, relocGreaterThan); ++ } ++ self.sect_relocs_lookup.items[section.id] = start; ++ + const cpu_arch = zld.options.target.cpu.arch; + const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); + const sect_start_index = sect_sym_index + sect_loc.index; +@@ -559,7 +580,7 @@ pub fn getSourceSections(self: Object) []const macho.section_64 { + } else unreachable; + } + +-pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { ++pub fn parseDataInCode(self: Object) ?[]align(1) const macho.data_in_code_entry { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], +@@ -569,10 +590,7 @@ pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { + .DATA_IN_CODE => { + const dice = cmd.cast(macho.linkedit_data_command).?; + const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); +- return @ptrCast( +- [*]const macho.data_in_code_entry, +- @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), +- )[0..ndice]; ++ return @ptrCast([*]align(1) const macho.data_in_code_entry, self.contents.ptr + dice.dataoff)[0..ndice]; + }, + else => {}, + } +@@ -632,11 +650,23 @@ pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 { + return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; + } + +-pub fn getRelocs(self: Object, sect: macho.section_64) []align(1) const macho.relocation_info { +- if (sect.nreloc == 0) return &[0]macho.relocation_info{}; ++fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info { ++ if (sect.nreloc == 0) return null; + return @ptrCast([*]align(1) const macho.relocation_info, self.contents.ptr + sect.reloff)[0..sect.nreloc]; + } + ++pub fn getRelocs(self: Object, sect_id: u16) []const macho.relocation_info { ++ const sect = self.getSourceSection(sect_id); ++ const start = self.sect_relocs_lookup.items[sect_id]; ++ const len = sect.nreloc; ++ return self.relocs.items[start..][0..len]; ++} ++ ++fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { ++ _ = ctx; ++ return lhs.r_address > rhs.r_address; ++} ++ + pub fn getSymbolName(self: Object, index: u32) []const u8 { + const strtab = self.in_strtab.?; + const sym = self.symtab[index]; +diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig +index 817aa816625..b42309598d7 100644 +--- a/src/link/MachO/ZldAtom.zig ++++ b/src/link/MachO/ZldAtom.zig +@@ -465,7 +465,7 @@ pub fn resolveRelocs( + zld: *Zld, + atom_index: AtomIndex, + atom_code: []u8, +- atom_relocs: []align(1) const macho.relocation_info, ++ atom_relocs: []const macho.relocation_info, + reverse_lookup: []u32, + ) !void { + const arch = zld.options.target.cpu.arch; +@@ -540,7 +540,7 @@ fn resolveRelocsArm64( + zld: *Zld, + atom_index: AtomIndex, + atom_code: []u8, +- atom_relocs: []align(1) const macho.relocation_info, ++ atom_relocs: []const macho.relocation_info, + reverse_lookup: []u32, + context: RelocContext, + ) !void { +@@ -579,7 +579,6 @@ fn resolveRelocsArm64( + } + + const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup); +- const rel_offset = @intCast(u32, rel.r_address - context.base_offset); + + log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ + @tagName(rel_type), +@@ -589,6 +588,7 @@ fn resolveRelocsArm64( + target.file, + }); + ++ const rel_offset = @intCast(u32, rel.r_address - context.base_offset); + const source_addr = blk: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + break :blk source_sym.n_value + rel_offset; +@@ -596,7 +596,7 @@ fn resolveRelocsArm64( + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; +- break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; ++ break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; + const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv); + +@@ -831,7 +831,7 @@ fn resolveRelocsX86( + zld: *Zld, + atom_index: AtomIndex, + atom_code: []u8, +- atom_relocs: []align(1) const macho.relocation_info, ++ atom_relocs: []const macho.relocation_info, + reverse_lookup: []u32, + context: RelocContext, + ) !void { +@@ -877,7 +877,7 @@ fn resolveRelocsX86( + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; +- break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; ++ break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); +@@ -1015,27 +1015,24 @@ pub fn getAtomCode(zld: *Zld, atom_index: AtomIndex) []const u8 { + return code[offset..][0..code_len]; + } + +-pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []align(1) const macho.relocation_info { ++pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []const macho.relocation_info { + const atom = zld.getAtomPtr(atom_index); + assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. + const object = zld.objects.items[atom.getFile().?]; + +- const source_sect = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { +- const source_sect = object.getSourceSection(source_sym.n_sect - 1); +- assert(!source_sect.isZerofill()); +- break :blk source_sect; ++ const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { ++ break :blk source_sym.n_sect - 1; + } else blk: { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); +- const source_sect = object.getSourceSection(sect_id); +- assert(!source_sect.isZerofill()); +- break :blk source_sect; ++ break :blk sect_id; + }; +- +- const relocs = object.getRelocs(source_sect); ++ const source_sect = object.getSourceSection(source_sect_id); ++ assert(!source_sect.isZerofill()); ++ const relocs = object.getRelocs(source_sect_id); + + if (atom.cached_relocs_start == -1) { + const indexes = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { +diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig +index 3a2ea79c6ec..cee3f302c08 100644 +--- a/src/link/MachO/zld.zig ++++ b/src/link/MachO/zld.zig +@@ -396,7 +396,7 @@ pub const Zld = struct { + break :blk null; + } + +- switch (sect.@"type"()) { ++ switch (sect.type()) { + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, +@@ -1701,7 +1701,7 @@ pub const Zld = struct { + break :outer; + } + } +- switch (header.@"type"()) { ++ switch (header.type()) { + macho.S_NON_LAZY_SYMBOL_POINTERS => { + try self.writeGotPointer(count, buffer.writer()); + }, +@@ -1718,7 +1718,7 @@ pub const Zld = struct { + break :outer; + } + } +- if (header.@"type"() == macho.S_SYMBOL_STUBS) { ++ if (header.type() == macho.S_SYMBOL_STUBS) { + try self.writeStubCode(atom_index, count, buffer.writer()); + } else if (mem.eql(u8, header.sectName(), "__stub_helper")) { + try self.writeStubHelperCode(atom_index, buffer.writer()); +@@ -1802,7 +1802,7 @@ pub const Zld = struct { + for (slice.items(.header)) |*header, sect_id| { + if (header.size == 0) continue; + if (self.requiresThunks()) { +- if (header.isCode() and !(header.@"type"() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; ++ if (header.isCode() and !(header.type() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; + } + + var atom_index = slice.items(.first_atom_index)[sect_id]; +@@ -1830,7 +1830,7 @@ pub const Zld = struct { + if (self.requiresThunks()) { + for (slice.items(.header)) |header, sect_id| { + if (!header.isCode()) continue; +- if (header.@"type"() == macho.S_SYMBOL_STUBS) continue; ++ if (header.type() == macho.S_SYMBOL_STUBS) continue; + if (mem.eql(u8, header.sectName(), "__stub_helper")) continue; + + // Create jump/branch range extenders if needed. +@@ -1994,10 +1994,10 @@ pub const Zld = struct { + const section_precedence: u4 = blk: { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; +- if (header.@"type"() == macho.S_SYMBOL_STUBS) break :blk 0x1; ++ if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; + break :blk 0x2; + } +- switch (header.@"type"()) { ++ switch (header.type()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => break :blk 0x0, +@@ -2121,7 +2121,7 @@ pub const Zld = struct { + + // Finally, unpack the rest. + for (slice.items(.header)) |header, sect_id| { +- switch (header.@"type"()) { ++ switch (header.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, +@@ -2252,7 +2252,7 @@ pub const Zld = struct { + // Finally, unpack the rest. + const slice = self.sections.slice(); + for (slice.items(.header)) |header, sect_id| { +- switch (header.@"type"()) { ++ switch (header.type()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, +@@ -2707,10 +2707,10 @@ pub const Zld = struct { + } + + fn filterDataInCode( +- dices: []const macho.data_in_code_entry, ++ dices: []align(1) const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +- ) []const macho.data_in_code_entry { ++ ) []align(1) const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index a0d1e76e1c350..62a5de4e4dfa7 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -24198,8 +24198,7 @@ with pkgs; zig_0_10 = darwin.apple_sdk_11_0.callPackage ../development/compilers/zig/0.10.nix { llvmPackages = llvmPackages_15; }; - # Zig 0.10.1 is broken on Darwin, so use 0.9.1 on Darwin instead. - zig = if stdenv.isDarwin then zig_0_9 else zig_0_10; + zig = zig_0_10; zimlib = callPackage ../development/libraries/zimlib { };