From 58ab4a0064641ca5a27b0998ee7d0e33f9c677cd Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 28 Apr 2014 17:17:18 -0700 Subject: [PATCH] rustc: Enable -f{function,data}-sections The compiler has previously been producing binaries on the order of 1.8MB for hello world programs "fn main() {}". This is largely a result of the compilation model used by compiling entire libraries into a single object file and because static linking is favored by default. When linking, linkers will pull in the entire contents of an object file if any symbol from the object file is used. This means that if any symbol from a rust library is used, the entire library is pulled in unconditionally, regardless of whether the library is used or not. Traditional C/C++ projects do not normally encounter these large executable problems because their archives (rust's rlibs) are composed of many objects. Because of this, linkers can eliminate entire objects from being in the final executable. With rustc, however, the linker does not have the opportunity to leave out entire object files. In order to get similar benefits from dead code stripping at link time, this commit enables the -ffunction-sections and -fdata-sections flags in LLVM, as well as passing --gc-sections to the linker *by default*. This means that each function and each global will be placed into its own section, allowing the linker to GC all unused functions and data symbols. By enabling these flags, rust is able to generate much smaller binaries default. On linux, a hello world binary went from 1.8MB to 597K (a 67% reduction in size). The output size of dynamic libraries remained constant, but the output size of rlibs increased, as seen below: libarena - 2.27% bigger ( 292872 => 299508) libcollections - 0.64% bigger ( 6765884 => 6809076) libflate - 0.83% bigger ( 186516 => 188060) libfourcc - 14.71% bigger ( 307290 => 352498) libgetopts - 4.42% bigger ( 761468 => 795102) libglob - 2.73% bigger ( 899932 => 924542) libgreen - 9.63% bigger ( 1281718 => 1405124) libhexfloat - 13.88% bigger ( 333738 => 380060) liblibc - 10.79% bigger ( 551280 => 610736) liblog - 10.93% bigger ( 218208 => 242060) libnative - 8.26% bigger ( 1362096 => 1474658) libnum - 2.34% bigger ( 2583400 => 2643916) librand - 1.72% bigger ( 1608684 => 1636394) libregex - 6.50% bigger ( 1747768 => 1861398) librustc - 4.21% bigger (151820192 => 158218924) librustdoc - 8.96% bigger ( 13142604 => 14320544) librustuv - 4.13% bigger ( 4366896 => 4547304) libsemver - 2.66% bigger ( 396166 => 406686) libserialize - 1.91% bigger ( 6878396 => 7009822) libstd - 3.59% bigger ( 39485286 => 40902218) libsync - 3.95% bigger ( 1386390 => 1441204) libsyntax - 4.96% bigger ( 35757202 => 37530798) libterm - 13.99% bigger ( 924580 => 1053902) libtest - 6.04% bigger ( 2455720 => 2604092) libtime - 2.84% bigger ( 1075708 => 1106242) liburl - 6.53% bigger ( 590458 => 629004) libuuid - 4.63% bigger ( 326350 => 341466) libworkcache - 8.45% bigger ( 1230702 => 1334750) This increase in size is a result of encoding many more section names into each object file (rlib). These increases are moderate enough that this change seems worthwhile to me, due to the drastic improvements seen in the final artifacts. The overall increase of the stage2 target folder (not the size of an install) went from 337MB to 348MB (3% increase). Additionally, linking is generally slower when executed with all these new sections plus the --gc-sections flag. The stage0 compiler takes 1.4s to link the `rustc` binary, where the stage1 compiler takes 1.9s to link the binary. Three megabytes are shaved off the binary. I found this increase in link time to be acceptable relative to the benefits of code size gained. This commit only enables --gc-sections for *executables*, not dynamic libraries. LLVM does all the heavy lifting when producing an object file for a dynamic library, so there is little else for the linker to do (remember that we only have one object file). I conducted similar experiments by putting a *module's* functions and data symbols into its own section (granularity moved to a module level instead of a function/static level). The size benefits of a hello world were seen to be on the order of 400K rather than 1.2MB. It seemed that enough benefit was gained using ffunction-sections that this route was less desirable, despite the lesser increases in binary rlib size. --- src/librustc/back/link.rs | 26 ++++++++++++++----- src/librustc/lib/llvm.rs | 4 ++- src/rustllvm/PassWrapper.cpp | 6 ++++- .../basic-types-globals-metadata.rs | 2 ++ src/test/debug-info/basic-types-globals.rs | 2 ++ src/test/debug-info/basic-types-metadata.rs | 1 + src/test/debug-info/c-style-enum.rs | 4 +++ src/test/debug-info/limited-debuginfo.rs | 1 + 8 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/librustc/back/link.rs b/src/librustc/back/link.rs index 44fb8dbb4ce5c..3669ba0c2f5cc 100644 --- a/src/librustc/back/link.rs +++ b/src/librustc/back/link.rs @@ -152,6 +152,12 @@ pub mod write { (sess.targ_cfg.os == abi::OsMacos && sess.targ_cfg.arch == abi::X86_64); + // OSX has -dead_strip, which doesn't rely on ffunction_sections + // FIXME(#13846) this should be enabled for windows + let ffunction_sections = sess.targ_cfg.os != abi::OsMacos && + sess.targ_cfg.os != abi::OsWin32; + let fdata_sections = ffunction_sections; + let reloc_model = match sess.opts.cg.relocation_model.as_slice() { "pic" => lib::llvm::RelocPIC, "static" => lib::llvm::RelocStatic, @@ -173,9 +179,11 @@ pub mod write { lib::llvm::CodeModelDefault, reloc_model, opt_level, - true, + true /* EnableSegstk */, use_softfp, - no_fp_elim + no_fp_elim, + ffunction_sections, + fdata_sections, ) }) }) @@ -1136,16 +1144,22 @@ fn link_args(sess: &Session, args.push("-nodefaultlibs".to_owned()); } + // If we're building a dylib, we don't use --gc-sections because LLVM has + // already done the best it can do, and we also don't want to eliminate the + // metadata. If we're building an executable, however, --gc-sections drops + // the size of hello world from 1.8MB to 597K, a 67% reduction. + if !dylib && sess.targ_cfg.os != abi::OsMacos { + args.push("-Wl,--gc-sections".to_owned()); + } + if sess.targ_cfg.os == abi::OsLinux { // GNU-style linkers will use this to omit linking to libraries which // don't actually fulfill any relocations, but only for libraries which // follow this flag. Thus, use it before specifying libraries to link to. args.push("-Wl,--as-needed".to_owned()); - // GNU-style linkers support optimization with -O. --gc-sections - // removes metadata and potentially other useful things, so don't - // include it. GNU ld doesn't need a numeric argument, but other linkers - // do. + // GNU-style linkers support optimization with -O. GNU ld doesn't need a + // numeric argument, but other linkers do. if sess.opts.optimize == session::Default || sess.opts.optimize == session::Aggressive { args.push("-Wl,-O1".to_owned()); diff --git a/src/librustc/lib/llvm.rs b/src/librustc/lib/llvm.rs index ba7c50547947e..74e332a8db456 100644 --- a/src/librustc/lib/llvm.rs +++ b/src/librustc/lib/llvm.rs @@ -1748,7 +1748,9 @@ pub mod llvm { Level: CodeGenOptLevel, EnableSegstk: bool, UseSoftFP: bool, - NoFramePointerElim: bool) -> TargetMachineRef; + NoFramePointerElim: bool, + FunctionSections: bool, + DataSections: bool) -> TargetMachineRef; pub fn LLVMRustDisposeTargetMachine(T: TargetMachineRef); pub fn LLVMRustAddAnalysisPasses(T: TargetMachineRef, PM: PassManagerRef, diff --git a/src/rustllvm/PassWrapper.cpp b/src/rustllvm/PassWrapper.cpp index 021dda4976550..1031f3c1570e9 100644 --- a/src/rustllvm/PassWrapper.cpp +++ b/src/rustllvm/PassWrapper.cpp @@ -69,7 +69,9 @@ LLVMRustCreateTargetMachine(const char *triple, CodeGenOpt::Level OptLevel, bool EnableSegmentedStacks, bool UseSoftFloat, - bool NoFramePointerElim) { + bool NoFramePointerElim, + bool FunctionSections, + bool DataSections) { std::string Error; Triple Trip(Triple::normalize(triple)); const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Trip.getTriple(), @@ -97,6 +99,8 @@ LLVMRustCreateTargetMachine(const char *triple, RM, CM, OptLevel); + TM->setDataSections(DataSections); + TM->setFunctionSections(FunctionSections); return wrap(TM); } diff --git a/src/test/debug-info/basic-types-globals-metadata.rs b/src/test/debug-info/basic-types-globals-metadata.rs index efc6fda26e0c0..bf6d63f2ca92f 100644 --- a/src/test/debug-info/basic-types-globals-metadata.rs +++ b/src/test/debug-info/basic-types-globals-metadata.rs @@ -66,6 +66,8 @@ static F64: f64 = 3.5; fn main() { _zzz(); + + let a = (B, I, C, I8, I16, I32, I64, U, U8, U16, U32, U64, F32, F64); } fn _zzz() {()} diff --git a/src/test/debug-info/basic-types-globals.rs b/src/test/debug-info/basic-types-globals.rs index 2a620e96892a7..cdc3132ca60b2 100644 --- a/src/test/debug-info/basic-types-globals.rs +++ b/src/test/debug-info/basic-types-globals.rs @@ -70,6 +70,8 @@ static F64: f64 = 3.5; fn main() { _zzz(); + + let a = (B, I, C, I8, I16, I32, I64, U, U8, U16, U32, U64, F32, F64); } fn _zzz() {()} diff --git a/src/test/debug-info/basic-types-metadata.rs b/src/test/debug-info/basic-types-metadata.rs index e06af6445ccfb..ae5d3a84b966a 100644 --- a/src/test/debug-info/basic-types-metadata.rs +++ b/src/test/debug-info/basic-types-metadata.rs @@ -67,6 +67,7 @@ fn main() { let f32: f32 = 2.5; let f64: f64 = 3.5; _zzz(); + if 1 == 1 { _yyy(); } } fn _zzz() {()} diff --git a/src/test/debug-info/c-style-enum.rs b/src/test/debug-info/c-style-enum.rs index b570110d8d013..0a207881bd3b3 100644 --- a/src/test/debug-info/c-style-enum.rs +++ b/src/test/debug-info/c-style-enum.rs @@ -121,6 +121,10 @@ fn main() { }; zzz(); + + let a = SINGLE_VARIANT; + let a = unsafe { AUTO_ONE }; + let a = unsafe { MANUAL_ONE }; } fn zzz() {()} diff --git a/src/test/debug-info/limited-debuginfo.rs b/src/test/debug-info/limited-debuginfo.rs index 36ccc91e88b05..51d9aa75e078e 100644 --- a/src/test/debug-info/limited-debuginfo.rs +++ b/src/test/debug-info/limited-debuginfo.rs @@ -38,6 +38,7 @@ struct Struct { fn main() { some_function(101, 202); + some_other_function(1, 2); }