diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index 883f82caa80d1..59c075a3d3e04 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -534,7 +534,7 @@ fn hex_encode(data: &[u8]) -> String { } pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) -> &'ll DIFile { - let cache_key = Some((source_file.name_hash, source_file.src_hash)); + let cache_key = Some((source_file.stable_id, source_file.src_hash)); return debug_context(cx) .created_files .borrow_mut() diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs index 31631e8a86495..d3a851b40c0a2 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs @@ -1,6 +1,7 @@ #![doc = include_str!("doc.md")] use rustc_codegen_ssa::mir::debuginfo::VariableKind::*; +use rustc_data_structures::unord::UnordMap; use self::metadata::{file_metadata, type_di_node}; use self::metadata::{UNKNOWN_COLUMN_NUMBER, UNKNOWN_LINE_NUMBER}; @@ -20,8 +21,6 @@ use crate::value::Value; use rustc_codegen_ssa::debuginfo::type_names; use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind}; use rustc_codegen_ssa::traits::*; -use rustc_data_structures::fx::FxHashMap; -use rustc_data_structures::stable_hasher::Hash128; use rustc_data_structures::sync::Lrc; use rustc_hir::def_id::{DefId, DefIdMap}; use rustc_index::IndexVec; @@ -32,7 +31,9 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TypeVisitableExt}; use rustc_session::config::{self, DebugInfo}; use rustc_session::Session; use rustc_span::symbol::Symbol; -use rustc_span::{BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span}; +use rustc_span::{ + BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId, +}; use rustc_target::abi::Size; use libc::c_uint; @@ -61,7 +62,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> { llcontext: &'ll llvm::Context, llmod: &'ll llvm::Module, builder: &'ll mut DIBuilder<'ll>, - created_files: RefCell, &'ll DIFile>>, + created_files: RefCell, &'ll DIFile>>, type_map: metadata::TypeMap<'ll, 'tcx>, namespace_map: RefCell>, diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index aa1ee96fae2ac..55daf441a751d 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -1671,7 +1671,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, .. } = source_file_to_import; @@ -1716,7 +1716,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { let local_version = sess.source_map().new_imported_source_file( name, src_hash, - name_hash, + stable_id, source_len.to_u32(), self.cnum, lines, diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 16906fa698975..411a70f9f1bb3 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -5,7 +5,7 @@ use rustc_ast::Attribute; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::memmap::{Mmap, MmapMut}; -use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher}; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{join, par_for_each_in, Lrc}; use rustc_data_structures::temp_dir::MaybeTempDir; use rustc_hir as hir; @@ -26,11 +26,12 @@ use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder}; use rustc_session::config::{CrateType, OptLevel}; use rustc_span::hygiene::HygieneEncodeContext; use rustc_span::symbol::sym; -use rustc_span::{ExternalSource, FileName, SourceFile, SpanData, SyntaxContext}; +use rustc_span::{ + ExternalSource, FileName, SourceFile, SpanData, StableSourceFileId, SyntaxContext, +}; use std::borrow::Borrow; use std::collections::hash_map::Entry; use std::fs::File; -use std::hash::Hash; use std::io::{Read, Seek, Write}; use std::path::{Path, PathBuf}; @@ -495,6 +496,8 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let mut adapted = TableBuilder::default(); + let local_crate_stable_id = self.tcx.stable_crate_id(LOCAL_CRATE); + // Only serialize `SourceFile`s that were used during the encoding of a `Span`. // // The order in which we encode source files is important here: the on-disk format for @@ -511,7 +514,9 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { // // At this point we also erase the actual on-disk path and only keep // the remapped version -- as is necessary for reproducible builds. - let mut source_file = match source_file.name { + let mut adapted_source_file = (**source_file).clone(); + + match source_file.name { FileName::Real(ref original_file_name) => { let adapted_file_name = if self.tcx.sess.should_prefer_remapped_for_codegen() { source_map.path_mapping().to_embeddable_absolute_path( @@ -525,22 +530,11 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { ) }; - if adapted_file_name != *original_file_name { - let mut adapted: SourceFile = (**source_file).clone(); - adapted.name = FileName::Real(adapted_file_name); - adapted.name_hash = { - let mut hasher: StableHasher = StableHasher::new(); - adapted.name.hash(&mut hasher); - hasher.finish::() - }; - Lrc::new(adapted) - } else { - // Nothing to adapt - source_file.clone() - } + adapted_source_file.name = FileName::Real(adapted_file_name); + } + _ => { + // expanded code, not from a file } - // expanded code, not from a file - _ => source_file.clone(), }; // We're serializing this `SourceFile` into our crate metadata, @@ -550,12 +544,20 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { // dependencies aren't loaded when we deserialize a proc-macro, // trying to remap the `CrateNum` would fail. if self.is_proc_macro { - Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE; + adapted_source_file.cnum = LOCAL_CRATE; } + // Update the `StableSourceFileId` to make sure it incorporates the + // id of the current crate. This way it will be unique within the + // crate graph during downstream compilation sessions. + adapted_source_file.stable_id = StableSourceFileId::from_filename_for_export( + &adapted_source_file.name, + local_crate_stable_id, + ); + let on_disk_index: u32 = on_disk_index.try_into().expect("cannot export more than U32_MAX files"); - adapted.set_some(on_disk_index, self.lazy(source_file)); + adapted.set_some(on_disk_index, self.lazy(adapted_source_file)); } adapted.encode(&mut self.opaque) diff --git a/compiler/rustc_middle/src/hir/map/mod.rs b/compiler/rustc_middle/src/hir/map/mod.rs index 81f34c7b8b0a9..1574f0f1b31fe 100644 --- a/compiler/rustc_middle/src/hir/map/mod.rs +++ b/compiler/rustc_middle/src/hir/map/mod.rs @@ -1098,7 +1098,7 @@ pub(super) fn crate_hash(tcx: TyCtxt<'_>, _: LocalCrate) -> Svh { .files() .iter() .filter(|source_file| source_file.cnum == LOCAL_CRATE) - .map(|source_file| source_file.name_hash) + .map(|source_file| source_file.stable_id) .collect(); source_file_names.sort_unstable(); diff --git a/compiler/rustc_middle/src/query/on_disk_cache.rs b/compiler/rustc_middle/src/query/on_disk_cache.rs index f37cfe8b0a111..0577d22d85086 100644 --- a/compiler/rustc_middle/src/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/query/on_disk_cache.rs @@ -1,6 +1,5 @@ use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; use rustc_data_structures::memmap::Mmap; -use rustc_data_structures::stable_hasher::Hash64; use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, RwLock}; use rustc_data_structures::unhash::UnhashMap; use rustc_data_structures::unord::UnordSet; @@ -21,8 +20,10 @@ use rustc_session::Session; use rustc_span::hygiene::{ ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData, }; -use rustc_span::source_map::{SourceMap, StableSourceFileId}; -use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span}; +use rustc_span::source_map::SourceMap; +use rustc_span::{ + BytePos, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span, StableSourceFileId, +}; use rustc_span::{CachingSourceMapView, Symbol}; use std::collections::hash_map::Entry; use std::mem; @@ -133,30 +134,18 @@ impl AbsoluteBytePos { } } -/// An `EncodedSourceFileId` is the same as a `StableSourceFileId` except that -/// the source crate is represented as a [StableCrateId] instead of as a -/// `CrateNum`. This way `EncodedSourceFileId` can be encoded and decoded -/// without any additional context, i.e. with a simple `opaque::Decoder` (which -/// is the only thing available when decoding the cache's [Footer]. #[derive(Encodable, Decodable, Clone, Debug)] struct EncodedSourceFileId { - file_name_hash: Hash64, + stable_source_file_id: StableSourceFileId, stable_crate_id: StableCrateId, } impl EncodedSourceFileId { - #[inline] - fn translate(&self, tcx: TyCtxt<'_>) -> StableSourceFileId { - let cnum = tcx.stable_crate_id_to_crate_num(self.stable_crate_id); - StableSourceFileId { file_name_hash: self.file_name_hash, cnum } - } - #[inline] fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId { - let source_file_id = StableSourceFileId::new(file); EncodedSourceFileId { - file_name_hash: source_file_id.file_name_hash, - stable_crate_id: tcx.stable_crate_id(source_file_id.cnum), + stable_source_file_id: file.stable_id, + stable_crate_id: tcx.stable_crate_id(file.cnum), } } } @@ -488,7 +477,9 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> { .borrow_mut() .entry(index) .or_insert_with(|| { - let stable_id = file_index_to_stable_id[&index].translate(tcx); + let source_file_id = &file_index_to_stable_id[&index]; + let source_file_cnum = + tcx.stable_crate_id_to_crate_num(source_file_id.stable_crate_id); // If this `SourceFile` is from a foreign crate, then make sure // that we've imported all of the source files from that crate. @@ -499,12 +490,14 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> { // that we will load the source files from that crate during macro // expansion, so we use `import_source_files` to ensure that the foreign // source files are actually imported before we call `source_file_by_stable_id`. - if stable_id.cnum != LOCAL_CRATE { - self.tcx.cstore_untracked().import_source_files(self.tcx.sess, stable_id.cnum); + if source_file_cnum != LOCAL_CRATE { + self.tcx + .cstore_untracked() + .import_source_files(self.tcx.sess, source_file_cnum); } source_map - .source_file_by_stable_id(stable_id) + .source_file_by_stable_id(source_file_id.stable_source_file_id) .expect("failed to lookup `SourceFile` in new context") }) .clone() diff --git a/compiler/rustc_query_system/src/ich/impls_syntax.rs b/compiler/rustc_query_system/src/ich/impls_syntax.rs index f2387017c8278..d170cd36ca6aa 100644 --- a/compiler/rustc_query_system/src/ich/impls_syntax.rs +++ b/compiler/rustc_query_system/src/ich/impls_syntax.rs @@ -60,8 +60,8 @@ impl<'ctx> rustc_ast::HashStableContext for StableHashingContext<'ctx> { impl<'a> HashStable> for SourceFile { fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) { let SourceFile { - name: _, // We hash the smaller name_hash instead of this - name_hash, + name: _, // We hash the smaller stable_id instead of this + stable_id, cnum, // Do not hash the source as it is not encoded src: _, @@ -75,7 +75,7 @@ impl<'a> HashStable> for SourceFile { ref normalized_pos, } = *self; - name_hash.hash_stable(hcx, hasher); + stable_id.hash_stable(hcx, hasher); src_hash.hash_stable(hcx, hasher); diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index cc3f0962d6cda..8f64eed9a870d 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -58,7 +58,7 @@ pub use hygiene::{DesugaringKind, ExpnKind, MacroKind}; pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext}; use rustc_data_structures::stable_hasher::HashingControls; pub mod def_id; -use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE}; +use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, StableCrateId, LOCAL_CRATE}; pub mod edit_distance; mod span_encoding; pub use span_encoding::{Span, DUMMY_SP}; @@ -1333,8 +1333,10 @@ pub struct SourceFile { pub non_narrow_chars: Vec, /// Locations of characters removed during normalization. pub normalized_pos: Vec, - /// A hash of the filename, used for speeding up hashing in incremental compilation. - pub name_hash: Hash128, + /// A hash of the filename & crate-id, used for uniquely identifying source + /// files within the crate graph and for speeding up hashing in incremental + /// compilation. + pub stable_id: StableSourceFileId, /// Indicates which crate this `SourceFile` was imported from. pub cnum: CrateNum, } @@ -1352,7 +1354,7 @@ impl Clone for SourceFile { multibyte_chars: self.multibyte_chars.clone(), non_narrow_chars: self.non_narrow_chars.clone(), normalized_pos: self.normalized_pos.clone(), - name_hash: self.name_hash, + stable_id: self.stable_id, cnum: self.cnum, } } @@ -1426,7 +1428,7 @@ impl Encodable for SourceFile { self.multibyte_chars.encode(s); self.non_narrow_chars.encode(s); - self.name_hash.encode(s); + self.stable_id.encode(s); self.normalized_pos.encode(s); self.cnum.encode(s); } @@ -1453,7 +1455,7 @@ impl Decodable for SourceFile { }; let multibyte_chars: Vec = Decodable::decode(d); let non_narrow_chars: Vec = Decodable::decode(d); - let name_hash = Decodable::decode(d); + let stable_id = Decodable::decode(d); let normalized_pos: Vec = Decodable::decode(d); let cnum: CrateNum = Decodable::decode(d); SourceFile { @@ -1469,7 +1471,7 @@ impl Decodable for SourceFile { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, cnum, } } @@ -1481,6 +1483,66 @@ impl fmt::Debug for SourceFile { } } +/// This is a [SourceFile] identifier that is used to correlate source files between +/// subsequent compilation sessions (which is something we need to do during +/// incremental compilation). +/// +/// It is a hash value (so we can efficiently consume it when stable-hashing +/// spans) that consists of the `FileName` and the `StableCrateId` of the crate +/// the source file is from. The crate id is needed because sometimes the +/// `FileName` is not unique within the crate graph (think `src/lib.rs`, for +/// example). +/// +/// The way the crate-id part is handled is a bit special: source files of the +/// local crate are hashed as `(filename, None)`, while source files from +/// upstream crates have a hash of `(filename, Some(stable_crate_id))`. This +/// is because SourceFiles for the local crate are allocated very early in the +/// compilation process when the `StableCrateId` is not yet known. If, due to +/// some refactoring of the compiler, the `StableCrateId` of the local crate +/// were to become available, it would be better to uniformely make this a +/// hash of `(filename, stable_crate_id)`. +/// +/// When `SourceFile`s are exported in crate metadata, the `StableSourceFileId` +/// is updated to incorporate the `StableCrateId` of the exporting crate. +#[derive( + Debug, + Clone, + Copy, + Hash, + PartialEq, + Eq, + HashStable_Generic, + Encodable, + Decodable, + Default, + PartialOrd, + Ord +)] +pub struct StableSourceFileId(Hash128); + +impl StableSourceFileId { + fn from_filename_in_current_crate(filename: &FileName) -> Self { + Self::from_filename_and_stable_crate_id(filename, None) + } + + pub fn from_filename_for_export( + filename: &FileName, + local_crate_stable_crate_id: StableCrateId, + ) -> Self { + Self::from_filename_and_stable_crate_id(filename, Some(local_crate_stable_crate_id)) + } + + fn from_filename_and_stable_crate_id( + filename: &FileName, + stable_crate_id: Option, + ) -> Self { + let mut hasher = StableHasher::new(); + filename.hash(&mut hasher); + stable_crate_id.hash(&mut hasher); + StableSourceFileId(hasher.finish()) + } +} + impl SourceFile { pub fn new( name: FileName, @@ -1491,11 +1553,7 @@ impl SourceFile { let src_hash = SourceFileHash::new(hash_kind, &src); let normalized_pos = normalize_src(&mut src); - let name_hash = { - let mut hasher: StableHasher = StableHasher::new(); - name.hash(&mut hasher); - hasher.finish() - }; + let stable_id = StableSourceFileId::from_filename_in_current_crate(&name); let source_len = src.len(); let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?; @@ -1513,7 +1571,7 @@ impl SourceFile { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, cnum: LOCAL_CRATE, }) } @@ -2213,7 +2271,7 @@ where }; Hash::hash(&TAG_VALID_SPAN, hasher); - Hash::hash(&file.name_hash, hasher); + Hash::hash(&file.stable_id, hasher); // Hash both the length and the end location (line/column) of a span. If we // hash only the length, for example, then two otherwise equal spans with diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index cb10e6bf2ba81..c61dbcaae9541 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -13,7 +13,6 @@ use crate::*; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::{IntoDynSyncSend, MappedReadGuard, ReadGuard, RwLock}; use std::fs; -use std::hash::Hash; use std::io::{self, BorrowedBuf, Read}; use std::path::{self}; @@ -152,45 +151,6 @@ impl FileLoader for RealFileLoader { } } -/// This is a [SourceFile] identifier that is used to correlate source files between -/// subsequent compilation sessions (which is something we need to do during -/// incremental compilation). -/// -/// The [StableSourceFileId] also contains the CrateNum of the crate the source -/// file was originally parsed for. This way we get two separate entries in -/// the [SourceMap] if the same file is part of both the local and an upstream -/// crate. Trying to only have one entry for both cases is problematic because -/// at the point where we discover that there's a local use of the file in -/// addition to the upstream one, we might already have made decisions based on -/// the assumption that it's an upstream file. Treating the two files as -/// different has no real downsides. -#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)] -pub struct StableSourceFileId { - /// A hash of the source file's [`FileName`]. This is hash so that it's size - /// is more predictable than if we included the actual [`FileName`] value. - pub file_name_hash: Hash64, - - /// The [`CrateNum`] of the crate this source file was originally parsed for. - /// We cannot include this information in the hash because at the time - /// of hashing we don't have the context to map from the [`CrateNum`]'s numeric - /// value to a `StableCrateId`. - pub cnum: CrateNum, -} - -// FIXME: we need a more globally consistent approach to the problem solved by -// StableSourceFileId, perhaps built atop source_file.name_hash. -impl StableSourceFileId { - pub fn new(source_file: &SourceFile) -> StableSourceFileId { - StableSourceFileId::new_from_name(&source_file.name, source_file.cnum) - } - - fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId { - let mut hasher = StableHasher::new(); - name.hash(&mut hasher); - StableSourceFileId { file_name_hash: hasher.finish(), cnum } - } -} - // _____________________________________________________________________________ // SourceMap // @@ -320,17 +280,17 @@ impl SourceMap { // be empty, so the working directory will be used. let (filename, _) = self.path_mapping.map_filename_prefix(&filename); - let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE); - match self.source_file_by_stable_id(file_id) { + let stable_id = StableSourceFileId::from_filename_in_current_crate(&filename); + match self.source_file_by_stable_id(stable_id) { Some(lrc_sf) => Ok(lrc_sf), None => { let source_file = SourceFile::new(filename, src, self.hash_kind)?; // Let's make sure the file_id we generated above actually matches // the ID we generate for the SourceFile we just created. - debug_assert_eq!(StableSourceFileId::new(&source_file), file_id); + debug_assert_eq!(source_file.stable_id, stable_id); - self.register_source_file(file_id, source_file) + self.register_source_file(stable_id, source_file) } } } @@ -343,7 +303,7 @@ impl SourceMap { &self, filename: FileName, src_hash: SourceFileHash, - name_hash: Hash128, + stable_id: StableSourceFileId, source_len: u32, cnum: CrateNum, file_local_lines: FreezeLock, @@ -368,12 +328,11 @@ impl SourceMap { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, cnum, }; - let file_id = StableSourceFileId::new(&source_file); - self.register_source_file(file_id, source_file) + self.register_source_file(stable_id, source_file) .expect("not enough address space for imported source file") } diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index 113ca493d36db..130522a302ddf 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -234,14 +234,14 @@ fn t10() { multibyte_chars, non_narrow_chars, normalized_pos, - name_hash, + stable_id, .. } = (*src_file).clone(); let imported_src_file = sm.new_imported_source_file( name, src_hash, - name_hash, + stable_id, source_len.to_u32(), CrateNum::new(0), FreezeLock::new(lines.read().clone()),