diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index 166454d3ae74c..fa67a1b331011 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -1385,7 +1385,7 @@ fn vcall_visibility_metadata<'ll, 'tcx>( let trait_def_id = trait_ref_self.def_id(); let trait_vis = cx.tcx.visibility(trait_def_id); - let cgus = cx.sess().codegen_units(); + let cgus = cx.sess().codegen_units().as_usize(); let single_cgu = cgus == 1; let lto = cx.sess().lto(); diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 9be69e560e888..109e9959aeac8 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -646,10 +646,10 @@ fn produce_final_output_artifacts( // rlib. let needs_crate_object = crate_output.outputs.contains_key(&OutputType::Exe); - let keep_numbered_bitcode = user_wants_bitcode && sess.codegen_units() > 1; + let keep_numbered_bitcode = user_wants_bitcode && sess.codegen_units().as_usize() > 1; let keep_numbered_objects = - needs_crate_object || (user_wants_objects && sess.codegen_units() > 1); + needs_crate_object || (user_wants_objects && sess.codegen_units().as_usize() > 1); for module in compiled_modules.modules.iter() { if let Some(ref path) = module.object { @@ -1923,7 +1923,7 @@ impl OngoingCodegen { // FIXME: time_llvm_passes support - does this use a global context or // something? - if sess.codegen_units() == 1 && sess.opts.unstable_opts.time_llvm_passes { + if sess.codegen_units().as_usize() == 1 && sess.opts.unstable_opts.time_llvm_passes { self.backend.print_pass_timings() } diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index f0c9605da1d13..ebcc3b0399973 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -113,6 +113,7 @@ use rustc_middle::query::Providers; use rustc_middle::ty::print::{characteristic_def_id_of_type, with_no_trimmed_paths}; use rustc_middle::ty::{self, visit::TypeVisitableExt, InstanceDef, TyCtxt}; use rustc_session::config::{DumpMonoStatsFormat, SwitchWithOptPath}; +use rustc_session::CodegenUnits; use rustc_span::symbol::Symbol; use crate::collector::UsageMap; @@ -322,7 +323,7 @@ fn merge_codegen_units<'tcx>( cx: &PartitioningCx<'_, 'tcx>, codegen_units: &mut Vec>, ) { - assert!(cx.tcx.sess.codegen_units() >= 1); + assert!(cx.tcx.sess.codegen_units().as_usize() >= 1); // A sorted order here ensures merging is deterministic. assert!(codegen_units.is_sorted_by(|a, b| Some(a.name().as_str().cmp(b.name().as_str())))); @@ -331,11 +332,32 @@ fn merge_codegen_units<'tcx>( let mut cgu_contents: FxHashMap> = codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect(); - // Merge the two smallest codegen units until the target size is - // reached. - while codegen_units.len() > cx.tcx.sess.codegen_units() { - // Sort small cgus to the back + // Having multiple CGUs can drastically speed up compilation. But for + // non-incremental builds, tiny CGUs slow down compilation *and* result in + // worse generated code. So we don't allow CGUs smaller than this (unless + // there is just one CGU, of course). Note that CGU sizes of 100,000+ are + // common in larger programs, so this isn't all that large. + const NON_INCR_MIN_CGU_SIZE: usize = 1000; + + // Repeatedly merge the two smallest codegen units as long as: + // - we have more CGUs than the upper limit, or + // - (Non-incremental builds only) the user didn't specify a CGU count, and + // there are multiple CGUs, and some are below the minimum size. + // + // The "didn't specify a CGU count" condition is because when an explicit + // count is requested we observe it as closely as possible. For example, + // the `compiler_builtins` crate sets `codegen-units = 10000` and it's + // critical they aren't merged. Also, some tests use explicit small values + // and likewise won't work if small CGUs are merged. + while codegen_units.len() > cx.tcx.sess.codegen_units().as_usize() + || (cx.tcx.sess.opts.incremental.is_none() + && matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_)) + && codegen_units.len() > 1 + && codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE)) + { + // Sort small cgus to the back. codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate())); + let mut smallest = codegen_units.pop().unwrap(); let second_smallest = codegen_units.last_mut().unwrap(); @@ -918,9 +940,13 @@ fn debug_dump<'a, 'tcx: 'a>( let symbol_hash_start = symbol_name.rfind('h'); let symbol_hash = symbol_hash_start.map_or("", |i| &symbol_name[i..]); let size = item.size_estimate(tcx); + let kind = match item.instantiation_mode(tcx) { + InstantiationMode::GloballyShared { .. } => "root", + InstantiationMode::LocalCopy => "inlined", + }; let _ = with_no_trimmed_paths!(writeln!( s, - " - {item} [{linkage:?}] [{symbol_hash}] (size={size})" + " - {item} [{linkage:?}] [{symbol_hash}] ({kind}, size: {size})" )); } diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index 2cc02003218ee..5feea83edb6a3 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -234,6 +234,27 @@ pub enum MetadataKind { Compressed, } +#[derive(Clone, Copy)] +pub enum CodegenUnits { + /// Specified by the user. In this case we try fairly hard to produce the + /// number of CGUs requested. + User(usize), + + /// A default value, i.e. not specified by the user. In this case we take + /// more liberties about CGU formation, e.g. avoid producing very small + /// CGUs. + Default(usize), +} + +impl CodegenUnits { + pub fn as_usize(self) -> usize { + match self { + CodegenUnits::User(n) => n, + CodegenUnits::Default(n) => n, + } + } +} + impl Session { pub fn miri_unleashed_feature(&self, span: Span, feature_gate: Option) { self.miri_unleashed_features.lock().push((span, feature_gate)); @@ -1104,7 +1125,7 @@ impl Session { // If there's only one codegen unit and LTO isn't enabled then there's // no need for ThinLTO so just return false. - if self.codegen_units() == 1 { + if self.codegen_units().as_usize() == 1 { return config::Lto::No; } @@ -1206,19 +1227,19 @@ impl Session { /// Returns the number of codegen units that should be used for this /// compilation - pub fn codegen_units(&self) -> usize { + pub fn codegen_units(&self) -> CodegenUnits { if let Some(n) = self.opts.cli_forced_codegen_units { - return n; + return CodegenUnits::User(n); } if let Some(n) = self.target.default_codegen_units { - return n as usize; + return CodegenUnits::Default(n as usize); } // If incremental compilation is turned on, we default to a high number // codegen units in order to reduce the "collateral damage" small // changes cause. if self.opts.incremental.is_some() { - return 256; + return CodegenUnits::Default(256); } // Why is 16 codegen units the default all the time? @@ -1271,7 +1292,7 @@ impl Session { // As a result 16 was chosen here! Mostly because it was a power of 2 // and most benchmarks agreed it was roughly a local optimum. Not very // scientific. - 16 + CodegenUnits::Default(16) } pub fn teach(&self, code: &DiagnosticId) -> bool { diff --git a/src/doc/rustc/src/codegen-options/index.md b/src/doc/rustc/src/codegen-options/index.md index 1041d5026690f..8de638dde4fbf 100644 --- a/src/doc/rustc/src/codegen-options/index.md +++ b/src/doc/rustc/src/codegen-options/index.md @@ -31,8 +31,8 @@ Supported values can also be discovered by running `rustc --print code-models`. ## codegen-units -This flag controls how many code generation units the crate is split into. It -takes an integer greater than 0. +This flag controls the maximum number of code generation units the crate is +split into. It takes an integer greater than 0. When a crate is split into multiple codegen units, LLVM is able to process them in parallel. Increasing parallelism may speed up compile times, but may