From 649e80184bf238760a2162c6f93090c4ed6abae8 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 14 Apr 2024 13:52:56 -0400 Subject: [PATCH 1/2] Codegen ZSTs without an allocation This makes sure that &[] is just as efficient as indirecting through unsafe code (from_raw_parts). No new stable guarantee is intended about whether or not we do this, this is just an optimization. Co-authored-by: Ralf Jung --- compiler/rustc_codegen_llvm/src/common.rs | 49 +++++++++++++++-------- compiler/rustc_codegen_llvm/src/consts.rs | 18 ++++++++- tests/ui/consts/zst_no_llvm_alloc.rs | 21 ++++++---- 3 files changed, 62 insertions(+), 26 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index 568fcc3f3cf49..ec33ce6292acc 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -255,21 +255,38 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> { let (prov, offset) = ptr.into_parts(); let (base_addr, base_addr_space) = match self.tcx.global_alloc(prov.alloc_id()) { GlobalAlloc::Memory(alloc) => { - let init = const_alloc_to_llvm(self, alloc); - let alloc = alloc.inner(); - let value = match alloc.mutability { - Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None), - _ => self.static_addr_of(init, alloc.align, None), - }; - if !self.sess().fewer_names() && llvm::get_value_name(value).is_empty() { - let hash = self.tcx.with_stable_hashing_context(|mut hcx| { - let mut hasher = StableHasher::new(); - alloc.hash_stable(&mut hcx, &mut hasher); - hasher.finish::() - }); - llvm::set_value_name(value, format!("alloc_{hash:032x}").as_bytes()); + // For ZSTs directly codegen an aligned pointer. + // This avoids generating a zero-sized constant value and actually needing a + // real address at runtime. + if alloc.inner().len() == 0 { + assert_eq!(offset.bytes(), 0); + let llval = self.const_usize(alloc.inner().align.bytes()); + return if matches!(layout.primitive(), Pointer(_)) { + unsafe { llvm::LLVMConstIntToPtr(llval, llty) } + } else { + self.const_bitcast(llval, llty) + }; + } else { + let init = const_alloc_to_llvm(self, alloc, /*static*/ false); + let alloc = alloc.inner(); + let value = match alloc.mutability { + Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None), + _ => self.static_addr_of(init, alloc.align, None), + }; + if !self.sess().fewer_names() && llvm::get_value_name(value).is_empty() + { + let hash = self.tcx.with_stable_hashing_context(|mut hcx| { + let mut hasher = StableHasher::new(); + alloc.hash_stable(&mut hcx, &mut hasher); + hasher.finish::() + }); + llvm::set_value_name( + value, + format!("alloc_{hash:032x}").as_bytes(), + ); + } + (value, AddressSpace::DATA) } - (value, AddressSpace::DATA) } GlobalAlloc::Function(fn_instance) => ( self.get_fn_addr(fn_instance.polymorphize(self.tcx)), @@ -280,7 +297,7 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> { .tcx .global_alloc(self.tcx.vtable_allocation((ty, trait_ref))) .unwrap_memory(); - let init = const_alloc_to_llvm(self, alloc); + let init = const_alloc_to_llvm(self, alloc, /*static*/ false); let value = self.static_addr_of(init, alloc.inner().align, None); (value, AddressSpace::DATA) } @@ -308,7 +325,7 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> { } fn const_data_from_alloc(&self, alloc: ConstAllocation<'tcx>) -> Self::Value { - const_alloc_to_llvm(self, alloc) + const_alloc_to_llvm(self, alloc, /*static*/ false) } fn const_bitcast(&self, val: &'ll Value, ty: &'ll Type) -> &'ll Value { diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index 4afa230e598b7..a62dfe132047e 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -26,8 +26,22 @@ use rustc_target::abi::{ }; use std::ops::Range; -pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: ConstAllocation<'_>) -> &'ll Value { +pub fn const_alloc_to_llvm<'ll>( + cx: &CodegenCx<'ll, '_>, + alloc: ConstAllocation<'_>, + is_static: bool, +) -> &'ll Value { let alloc = alloc.inner(); + // We expect that callers of const_alloc_to_llvm will instead directly codegen a pointer or + // integer for any &ZST where the ZST is a constant (i.e. not a static). We should never be + // producing empty LLVM allocations as they're just adding noise to binaries and forcing less + // optimal codegen. + // + // Statics have a guaranteed meaningful address so it's less clear that we want to do + // something like this; it's also harder. + if !is_static { + assert!(alloc.len() != 0); + } let mut llvals = Vec::with_capacity(alloc.provenance().ptrs().len() + 1); let dl = cx.data_layout(); let pointer_size = dl.pointer_size.bytes() as usize; @@ -120,7 +134,7 @@ fn codegen_static_initializer<'ll, 'tcx>( def_id: DefId, ) -> Result<(&'ll Value, ConstAllocation<'tcx>), ErrorHandled> { let alloc = cx.tcx.eval_static_initializer(def_id)?; - Ok((const_alloc_to_llvm(cx, alloc), alloc)) + Ok((const_alloc_to_llvm(cx, alloc, /*static*/ true), alloc)) } fn set_global_alignment<'ll>(cx: &CodegenCx<'ll, '_>, gv: &'ll Value, mut align: Align) { diff --git a/tests/ui/consts/zst_no_llvm_alloc.rs b/tests/ui/consts/zst_no_llvm_alloc.rs index 1622a199a7a40..48ef11e2b58cf 100644 --- a/tests/ui/consts/zst_no_llvm_alloc.rs +++ b/tests/ui/consts/zst_no_llvm_alloc.rs @@ -6,16 +6,21 @@ struct Foo; static FOO: Foo = Foo; fn main() { + // There's no stable guarantee that these are true. + // However, we want them to be true so that our LLVM IR and runtime are a bit faster: + // a constant address is cheap and doesn't result in relocations in comparison to a "real" + // global somewhere in the data section. let x: &'static () = &(); - assert_ne!(x as *const () as usize, 1); + assert_eq!(x as *const () as usize, 1); let x: &'static Foo = &Foo; - assert_ne!(x as *const Foo as usize, 4); + assert_eq!(x as *const Foo as usize, 4); - // statics must have a unique address - assert_ne!(&FOO as *const Foo as usize, 4); + // The exact addresses returned by these library functions are not necessarily stable guarantees + // but for now we assert that we're still matching. + assert_eq!(>::new().as_ptr(), <&[i32]>::default().as_ptr()); + assert_eq!(>::default().as_ptr(), (&[]).as_ptr()); - // FIXME this two tests should be assert_eq! - // this stopped working since we are promoting to constants instead of statics - assert_ne!(>::new().as_ptr(), <&[i32]>::default().as_ptr()); - assert_ne!(>::default().as_ptr(), (&[]).as_ptr()); + // statics must have a unique address (see https://github.com/rust-lang/rust/issues/18297, not + // clear whether this is a stable guarantee) + assert_ne!(&FOO as *const Foo as usize, 4); } From 9ab6e36d8d1cad691a9078921a8ce07de5d4de02 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 14 Apr 2024 16:39:08 -0400 Subject: [PATCH 2/2] Fix broken test Testing for ASLR by casting &ZST to *const _ is not useful, there's no guarantee that &ZST produces an ASLR'd pointer. --- tests/run-make/static-pie/test-aslr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/run-make/static-pie/test-aslr.rs b/tests/run-make/static-pie/test-aslr.rs index 96b17af46dfe9..8d8e7586323a9 100644 --- a/tests/run-make/static-pie/test-aslr.rs +++ b/tests/run-make/static-pie/test-aslr.rs @@ -17,7 +17,7 @@ fn main() { let arg0 = args.next().unwrap(); match args.next() { Some(s) if s.eq("--report") => { - println!("main = {:#?}", &main as *const _); + println!("main = {:#?}", main as fn() as usize); } Some(s) if s.eq("--test-no-aslr") => { let cnt = run_self(&arg0);