From d9a54da89d799398e015202ac48108634b66d87e Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Sat, 6 Jul 2024 15:21:00 +0200 Subject: [PATCH] Only store absolute paths in `Files` --- crates/red_knot/src/main.rs | 4 +- crates/red_knot_module_resolver/src/db.rs | 6 +- .../red_knot_module_resolver/src/resolver.rs | 17 ++-- crates/ruff_db/src/files.rs | 35 ++++++-- crates/ruff_db/src/system.rs | 3 + crates/ruff_db/src/system/memory_fs.rs | 85 +++++++------------ crates/ruff_db/src/system/os.rs | 27 +++++- crates/ruff_db/src/system/path.rs | 84 ++++++++++++++++++ crates/ruff_db/src/system/test.rs | 15 +++- 9 files changed, 195 insertions(+), 81 deletions(-) diff --git a/crates/red_knot/src/main.rs b/crates/red_knot/src/main.rs index dcc7eafa0a946f..06ef594482b6d0 100644 --- a/crates/red_knot/src/main.rs +++ b/crates/red_knot/src/main.rs @@ -35,7 +35,9 @@ pub fn main() -> anyhow::Result<()> { return Err(anyhow::anyhow!("Invalid arguments")); } - let system = OsSystem; + let cwd = std::env::current_dir().unwrap(); + let cwd = SystemPath::from_std_path(&cwd).unwrap(); + let system = OsSystem::new(cwd); let entry_point = SystemPath::new(&arguments[1]); if !system.path_exists(entry_point) { diff --git a/crates/red_knot_module_resolver/src/db.rs b/crates/red_knot_module_resolver/src/db.rs index 11eae4cfd685c3..d7a97e150664e7 100644 --- a/crates/red_knot_module_resolver/src/db.rs +++ b/crates/red_knot_module_resolver/src/db.rs @@ -189,9 +189,9 @@ pub(crate) mod tests { let db = TestDb::new(); - let src = SystemPathBuf::from("src"); - let site_packages = SystemPathBuf::from("site_packages"); - let custom_typeshed = SystemPathBuf::from("typeshed"); + let src = SystemPathBuf::from("/src"); + let site_packages = SystemPathBuf::from("/site_packages"); + let custom_typeshed = SystemPathBuf::from("/typeshed"); let fs = db.memory_file_system(); diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 56f1137925ca45..deff02e1d41632 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -387,7 +387,7 @@ impl PackageKind { #[cfg(test)] mod tests { use ruff_db::files::{system_path_to_file, File, FilePath}; - use ruff_db::system::{DbWithTestSystem, SystemPath}; + use ruff_db::system::DbWithTestSystem; use crate::db::tests::{create_resolver_builder, TestCase}; use crate::module::ModuleKind; @@ -826,6 +826,12 @@ mod tests { #[test] #[cfg(target_family = "unix")] fn symlink() -> anyhow::Result<()> { + use ruff_db::system::{OsSystem, SystemPath}; + + fn make_relative(path: &SystemPath) -> &SystemPath { + path.strip_prefix("/").unwrap_or(path) + } + let TestCase { mut db, src, @@ -833,14 +839,13 @@ mod tests { custom_typeshed, } = setup_resolver_test(); - db.use_os_system(); - let temp_dir = tempfile::tempdir()?; let root = SystemPath::from_std_path(temp_dir.path()).unwrap(); + db.use_os_system(OsSystem::new(root)); - let src = root.join(src); - let site_packages = root.join(site_packages); - let custom_typeshed = root.join(custom_typeshed); + let src = root.join(make_relative(&src)); + let site_packages = root.join(make_relative(&site_packages)); + let custom_typeshed = root.join(make_relative(&custom_typeshed)); let foo = src.join("foo.py"); let bar = src.join("bar.py"); diff --git a/crates/ruff_db/src/files.rs b/crates/ruff_db/src/files.rs index 8c5abac934893c..1650facdec4a99 100644 --- a/crates/ruff_db/src/files.rs +++ b/crates/ruff_db/src/files.rs @@ -60,17 +60,20 @@ impl Files { /// In these cases, a file with status [`FileStatus::Deleted`] is returned. #[tracing::instrument(level = "debug", skip(self, db))] fn system(&self, db: &dyn Db, path: &SystemPath) -> File { + let absolute = SystemPath::absolute(path, db.system().current_directory()); + let absolute = FilePath::System(absolute); + *self .inner .files_by_path - .entry(FilePath::System(path.to_path_buf())) + .entry(absolute.clone()) .or_insert_with(|| { let metadata = db.system().path_metadata(path); match metadata { Ok(metadata) if metadata.file_type().is_file() => File::new( db, - FilePath::System(path.to_path_buf()), + absolute, metadata.permissions(), metadata.revision(), FileStatus::Exists, @@ -78,7 +81,7 @@ impl Files { ), _ => File::new( db, - FilePath::System(path.to_path_buf()), + absolute, None, FileRevision::zero(), FileStatus::Deleted, @@ -89,10 +92,11 @@ impl Files { } /// Tries to look up the file for the given system path, returns `None` if no such file exists yet - fn try_system(&self, path: &SystemPath) -> Option { + fn try_system(&self, db: &dyn Db, path: &SystemPath) -> Option { + let absolute = SystemPath::absolute(path, db.system().current_directory()); self.inner .files_by_path - .get(&FilePath::System(path.to_path_buf())) + .get(&FilePath::System(absolute)) .map(|entry| *entry.value()) } @@ -224,7 +228,7 @@ impl File { _ => (FileStatus::Deleted, FileRevision::zero()), }; - let Some(file) = file.or_else(|| db.files().try_system(path)) else { + let Some(file) = file.or_else(|| db.files().try_system(db, path)) else { return; }; @@ -260,7 +264,7 @@ mod tests { use crate::vendored::tests::VendoredFileSystemBuilder; #[test] - fn file_system_existing_file() -> crate::system::Result<()> { + fn system_existing_file() -> crate::system::Result<()> { let mut db = TestDb::new(); db.write_file("test.py", "print('Hello world')")?; @@ -275,7 +279,7 @@ mod tests { } #[test] - fn file_system_non_existing_file() { + fn system_non_existing_file() { let db = TestDb::new(); let test = system_path_to_file(&db, "test.py"); @@ -283,6 +287,21 @@ mod tests { assert_eq!(test, None); } + #[test] + fn system_normalize_paths() { + let db = TestDb::new(); + + assert_eq!( + system_path_to_file(&db, "test.py"), + system_path_to_file(&db, "/test.py") + ); + + assert_eq!( + system_path_to_file(&db, "/root/.././test.py"), + system_path_to_file(&db, "/root/test.py") + ); + } + #[test] fn stubbed_vendored_file() { let mut db = TestDb::new(); diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index 92637f5457c5fc..3816dd2723d800 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -51,6 +51,9 @@ pub trait System { .map_or(false, |metadata| metadata.file_type.is_file()) } + /// Returns the current working directory + fn current_directory(&self) -> &SystemPath; + fn as_any(&self) -> &dyn std::any::Any; } diff --git a/crates/ruff_db/src/system/memory_fs.rs b/crates/ruff_db/src/system/memory_fs.rs index 286af8f8e22e40..1ee05806b121e5 100644 --- a/crates/ruff_db/src/system/memory_fs.rs +++ b/crates/ruff_db/src/system/memory_fs.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, RwLock, RwLockWriteGuard}; use camino::{Utf8Path, Utf8PathBuf}; use filetime::FileTime; -use crate::system::{FileType, Metadata, Result, SystemPath}; +use crate::system::{FileType, Metadata, Result, SystemPath, SystemPathBuf}; /// File system that stores all content in memory. /// @@ -32,7 +32,7 @@ impl MemoryFileSystem { /// Creates a new, empty in memory file system with the given current working directory. pub fn with_cwd(cwd: impl AsRef) -> Self { - let cwd = Utf8PathBuf::from(cwd.as_ref().as_str()); + let cwd = cwd.as_ref().to_path_buf(); assert!( cwd.starts_with("/"), @@ -46,11 +46,15 @@ impl MemoryFileSystem { }), }; - fs.create_directory_all(SystemPath::new(&cwd)).unwrap(); + fs.create_directory_all(&cwd).unwrap(); fs } + pub fn cwd(&self) -> &SystemPath { + &self.inner.cwd + } + #[must_use] pub fn snapshot(&self) -> Self { Self { @@ -59,9 +63,9 @@ impl MemoryFileSystem { } pub fn metadata(&self, path: impl AsRef) -> Result { - fn metadata(fs: &MemoryFileSystemInner, path: &SystemPath) -> Result { - let by_path = fs.by_path.read().unwrap(); - let normalized = normalize_path(path, &fs.cwd); + fn metadata(fs: &MemoryFileSystem, path: &SystemPath) -> Result { + let by_path = fs.inner.by_path.read().unwrap(); + let normalized = fs.normalize_path(path); let entry = by_path.get(&normalized).ok_or_else(not_found)?; @@ -81,27 +85,27 @@ impl MemoryFileSystem { Ok(metadata) } - metadata(&self.inner, path.as_ref()) + metadata(self, path.as_ref()) } pub fn is_file(&self, path: impl AsRef) -> bool { let by_path = self.inner.by_path.read().unwrap(); - let normalized = normalize_path(path.as_ref(), &self.inner.cwd); + let normalized = self.normalize_path(path.as_ref()); matches!(by_path.get(&normalized), Some(Entry::File(_))) } pub fn is_directory(&self, path: impl AsRef) -> bool { let by_path = self.inner.by_path.read().unwrap(); - let normalized = normalize_path(path.as_ref(), &self.inner.cwd); + let normalized = self.normalize_path(path.as_ref()); matches!(by_path.get(&normalized), Some(Entry::Directory(_))) } pub fn read_to_string(&self, path: impl AsRef) -> Result { - fn read_to_string(fs: &MemoryFileSystemInner, path: &SystemPath) -> Result { - let by_path = fs.by_path.read().unwrap(); - let normalized = normalize_path(path, &fs.cwd); + fn read_to_string(fs: &MemoryFileSystem, path: &SystemPath) -> Result { + let by_path = fs.inner.by_path.read().unwrap(); + let normalized = fs.normalize_path(path); let entry = by_path.get(&normalized).ok_or_else(not_found)?; @@ -111,12 +115,12 @@ impl MemoryFileSystem { } } - read_to_string(&self.inner, path.as_ref()) + read_to_string(self, path.as_ref()) } pub fn exists(&self, path: &SystemPath) -> bool { let by_path = self.inner.by_path.read().unwrap(); - let normalized = normalize_path(path, &self.inner.cwd); + let normalized = self.normalize_path(path); by_path.contains_key(&normalized) } @@ -146,7 +150,7 @@ impl MemoryFileSystem { pub fn write_file(&self, path: impl AsRef, content: impl ToString) -> Result<()> { let mut by_path = self.inner.by_path.write().unwrap(); - let normalized = normalize_path(path.as_ref(), &self.inner.cwd); + let normalized = self.normalize_path(path.as_ref()); get_or_create_file(&mut by_path, &normalized)?.content = content.to_string(); @@ -156,7 +160,7 @@ impl MemoryFileSystem { pub fn remove_file(&self, path: impl AsRef) -> Result<()> { fn remove_file(fs: &MemoryFileSystem, path: &SystemPath) -> Result<()> { let mut by_path = fs.inner.by_path.write().unwrap(); - let normalized = normalize_path(path, &fs.inner.cwd); + let normalized = fs.normalize_path(path); match by_path.entry(normalized) { std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { @@ -178,7 +182,7 @@ impl MemoryFileSystem { /// Creates a new file if the file at `path` doesn't exist. pub fn touch(&self, path: impl AsRef) -> Result<()> { let mut by_path = self.inner.by_path.write().unwrap(); - let normalized = normalize_path(path.as_ref(), &self.inner.cwd); + let normalized = self.normalize_path(path.as_ref()); get_or_create_file(&mut by_path, &normalized)?.last_modified = FileTime::now(); @@ -188,7 +192,7 @@ impl MemoryFileSystem { /// Creates a directory at `path`. All enclosing directories are created if they don't exist. pub fn create_directory_all(&self, path: impl AsRef) -> Result<()> { let mut by_path = self.inner.by_path.write().unwrap(); - let normalized = normalize_path(path.as_ref(), &self.inner.cwd); + let normalized = self.normalize_path(path.as_ref()); create_dir_all(&mut by_path, &normalized) } @@ -202,7 +206,7 @@ impl MemoryFileSystem { pub fn remove_directory(&self, path: impl AsRef) -> Result<()> { fn remove_directory(fs: &MemoryFileSystem, path: &SystemPath) -> Result<()> { let mut by_path = fs.inner.by_path.write().unwrap(); - let normalized = normalize_path(path, &fs.inner.cwd); + let normalized = fs.normalize_path(path); // Test if the directory is empty // Skip the directory path itself @@ -228,6 +232,11 @@ impl MemoryFileSystem { remove_directory(self, path.as_ref()) } + + fn normalize_path(&self, path: impl AsRef) -> Utf8PathBuf { + let normalized = SystemPath::absolute(path, &self.inner.cwd); + normalized.into_utf8_path_buf() + } } impl Default for MemoryFileSystem { @@ -246,7 +255,7 @@ impl std::fmt::Debug for MemoryFileSystem { struct MemoryFileSystemInner { by_path: RwLock>, - cwd: Utf8PathBuf, + cwd: SystemPathBuf, } #[derive(Debug)] @@ -292,42 +301,6 @@ fn directory_not_empty() -> std::io::Error { std::io::Error::new(std::io::ErrorKind::Other, "directory not empty") } -/// Normalizes the path by removing `.` and `..` components and transform the path into an absolute path. -/// -/// Adapted from https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61 -fn normalize_path(path: &SystemPath, cwd: &Utf8Path) -> Utf8PathBuf { - let path = camino::Utf8Path::new(path.as_str()); - - let mut components = path.components().peekable(); - let mut ret = - if let Some(c @ (camino::Utf8Component::Prefix(..) | camino::Utf8Component::RootDir)) = - components.peek().cloned() - { - components.next(); - Utf8PathBuf::from(c.as_str()) - } else { - cwd.to_path_buf() - }; - - for component in components { - match component { - camino::Utf8Component::Prefix(..) => unreachable!(), - camino::Utf8Component::RootDir => { - ret.push(component); - } - camino::Utf8Component::CurDir => {} - camino::Utf8Component::ParentDir => { - ret.pop(); - } - camino::Utf8Component::Normal(c) => { - ret.push(c); - } - } - } - - ret -} - fn create_dir_all( paths: &mut RwLockWriteGuard>, normalized: &Utf8Path, diff --git a/crates/ruff_db/src/system/os.rs b/crates/ruff_db/src/system/os.rs index 79c27c27ecd000..40165c97c88369 100644 --- a/crates/ruff_db/src/system/os.rs +++ b/crates/ruff_db/src/system/os.rs @@ -1,12 +1,27 @@ +use crate::system::{FileType, Metadata, Result, System, SystemPath, SystemPathBuf}; use filetime::FileTime; use std::any::Any; +use std::sync::Arc; -use crate::system::{FileType, Metadata, Result, System, SystemPath}; +#[derive(Default, Debug)] +pub struct OsSystem { + inner: Arc, +} #[derive(Default, Debug)] -pub struct OsSystem; +struct OsSystemInner { + cwd: SystemPathBuf, +} impl OsSystem { + pub fn new(cwd: impl AsRef) -> Self { + Self { + inner: Arc::new(OsSystemInner { + cwd: cwd.as_ref().to_path_buf(), + }), + } + } + #[cfg(unix)] fn permissions(metadata: &std::fs::Metadata) -> Option { use std::os::unix::fs::PermissionsExt; @@ -20,7 +35,9 @@ impl OsSystem { } pub fn snapshot(&self) -> Self { - Self + Self { + inner: self.inner.clone(), + } } } @@ -44,6 +61,10 @@ impl System for OsSystem { path.as_std_path().exists() } + fn current_directory(&self) -> &SystemPath { + &self.inner.cwd + } + fn as_any(&self) -> &dyn Any { self } diff --git a/crates/ruff_db/src/system/path.rs b/crates/ruff_db/src/system/path.rs index 8e370f75551eb6..7efb37c05152b5 100644 --- a/crates/ruff_db/src/system/path.rs +++ b/crates/ruff_db/src/system/path.rs @@ -306,6 +306,86 @@ impl SystemPath { pub fn from_std_path(path: &Path) -> Option<&SystemPath> { Some(SystemPath::new(Utf8Path::from_path(path)?)) } + + /// Makes a path absolute and normalizes it without accessing the file system. + /// + /// Adapted from [cargo](https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61) + /// + /// # Examples + /// + /// ## Posix paths + /// + /// ``` + /// # #[cfg(unix)] + /// # fn main() { + /// use ruff_db::system::{SystemPath, SystemPathBuf}; + /// + /// // Relative to absolute + /// let absolute = SystemPath::absolute("foo/./bar", "/tmp"); + /// assert_eq!(absolute, SystemPathBuf::from("/tmp/foo/bar")); + /// + /// // Absolute to absolute + /// let absolute = SystemPath::absolute("/foo//test/.././bar.rs", "/tmp"); + /// assert_eq!(absolute, SystemPathBuf::from("/foo/bar.rs")); + /// # } + /// # #[cfg(not(unix))] + /// # fn main() {} + /// ``` + /// + /// ## Windows paths + /// + /// ``` + /// # #[cfg(windows)] + /// # fn main() { + /// use ruff_db::system::{SystemPath, SystemPathBuf}; + /// + /// // Relative to absolute + /// let absolute = SystemPath::absolute(r"foo\.\bar", r"C:\tmp"); + /// assert_eq!(absolute, SystemPathBuf::from(r"C:\tmp\foo\bar")); + /// + /// // Absolute to absolute + /// let absolute = SystemPath::absolute(r"C:\foo//test\..\./bar.rs", r"C:\tmp"); + /// assert_eq!(absolute, SystemPathBuf::from(r"C:\foo\bar.rs")); + /// # } + /// # #[cfg(not(windows))] + /// # fn main() {} + /// ``` + pub fn absolute(path: impl AsRef, cwd: impl AsRef) -> SystemPathBuf { + fn absolute(path: &SystemPath, cwd: &SystemPath) -> SystemPathBuf { + let path = &path.0; + + let mut components = path.components().peekable(); + let mut ret = if let Some( + c @ (camino::Utf8Component::Prefix(..) | camino::Utf8Component::RootDir), + ) = components.peek().cloned() + { + components.next(); + Utf8PathBuf::from(c.as_str()) + } else { + cwd.0.to_path_buf() + }; + + for component in components { + match component { + camino::Utf8Component::Prefix(..) => unreachable!(), + camino::Utf8Component::RootDir => { + ret.push(component); + } + camino::Utf8Component::CurDir => {} + camino::Utf8Component::ParentDir => { + ret.pop(); + } + camino::Utf8Component::Normal(c) => { + ret.push(c); + } + } + } + + SystemPathBuf::from_utf8_path_buf(ret) + } + + absolute(path.as_ref(), cwd.as_ref()) + } } /// An owned, mutable path on [`System`](`super::System`) (akin to [`String`]). @@ -366,6 +446,10 @@ impl SystemPathBuf { self.0.push(&path.as_ref().0); } + pub fn into_utf8_path_buf(self) -> Utf8PathBuf { + self.0 + } + #[inline] pub fn as_path(&self) -> &SystemPath { SystemPath::new(&self.0) diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index 27b872e595f6b2..635018717df14d 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -34,8 +34,8 @@ impl TestSystem { } } - fn use_os_system(&mut self) { - self.inner = TestFileSystem::Os(OsSystem); + fn use_os_system(&mut self, os: OsSystem) { + self.inner = TestFileSystem::Os(os); } } @@ -75,6 +75,13 @@ impl System for TestSystem { } } + fn current_directory(&self) -> &SystemPath { + match &self.inner { + TestFileSystem::Stub(fs) => fs.cwd(), + TestFileSystem::Os(fs) => fs.current_directory(), + } + } + fn as_any(&self) -> &dyn Any { self } @@ -132,8 +139,8 @@ pub trait DbWithTestSystem: Db + Sized { /// This useful for testing advanced file system features like permissions, symlinks, etc. /// /// Note that any files written to the memory file system won't be copied over. - fn use_os_system(&mut self) { - self.test_system_mut().use_os_system(); + fn use_os_system(&mut self, os: OsSystem) { + self.test_system_mut().use_os_system(os); } /// Returns the memory file system.