From b658f9393d9e1e769258ef6fbcd780f261833745 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 9 May 2024 19:19:36 -0400 Subject: [PATCH 1/8] fix(npm): extract npm tarball into temp dir then rename to destination --- cli/npm/managed/cache.rs | 26 +++++++++++++++++--------- cli/npm/managed/tarball.rs | 18 ++++++++++++++---- cli/util/fs.rs | 12 ++---------- cli/util/path.rs | 26 ++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 23 deletions(-) diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs index 9ba5c1c9963082..fe458d664877ac 100644 --- a/cli/npm/managed/cache.rs +++ b/cli/npm/managed/cache.rs @@ -69,7 +69,7 @@ impl NpmCache { /// to ensure a package is only downloaded once per run of the CLI. This /// prevents downloads from re-occurring when someone has `--reload` and /// and imports a dynamic import that imports the same package again for example. - fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool { + fn should_use_cache_for_package(&self, package: &PackageNv) -> bool { self.cache_setting.should_use_for_npm_package(&package.name) || !self .previously_reloaded_packages @@ -98,12 +98,8 @@ impl NpmCache { let package_folder = self .cache_dir .package_folder_for_name_and_version(package, registry_url); - if self.should_use_global_cache_for_package(package) - && self.fs.exists_sync(&package_folder) - // if this file exists, then the package didn't successfully extract - // the first time, or another process is currently extracting the zip file - && !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME)) - { + let should_use_cache = self.should_use_cache_for_package(package); + if should_use_cache && self.fs.exists_sync(&package_folder) { return Ok(()); } else if self.cache_setting == CacheSetting::Only { return Err(custom_error( @@ -127,6 +123,15 @@ impl NpmCache { .await?; match maybe_bytes { Some(bytes) => { + // the user ran with `--reload`, so remove the existing directory + if !should_use_cache && self.fs.exists_sync(&package_folder) { + self + .fs + .remove_sync(&package_folder, true) + .map_err(AnyError::from) + .context("Failed removing package directory.")?; + } + verify_and_extract_tarball(package, &bytes, dist, &package_folder) } None => { @@ -150,7 +155,7 @@ impl NpmCache { .package_folder_for_id(folder_id, registry_url); if package_folder.exists() - // if this file exists, then the package didn't successfully extract + // if this file exists, then the package didn't successfully initialize // the first time, or another process is currently extracting the zip file && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() && self.cache_setting.should_use_for_npm_package(&folder_id.nv.name) @@ -161,6 +166,9 @@ impl NpmCache { let original_package_folder = self .cache_dir .package_folder_for_name_and_version(&folder_id.nv, registry_url); + + // it seems Windows does an "AccessDenied" error when moving a + // directory with hard links, so that's why this solution is done with_folder_sync_lock(&folder_id.nv, &package_folder, || { hard_link_dir_recursive(&original_package_folder, &package_folder) })?; @@ -206,7 +214,7 @@ impl NpmCache { const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; -pub fn with_folder_sync_lock( +fn with_folder_sync_lock( package: &PackageNv, output_folder: &Path, action: impl FnOnce() -> Result<(), AnyError>, diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index 1267b13d8cbddb..12b785ca9a5869 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -16,7 +16,7 @@ use flate2::read::GzDecoder; use tar::Archive; use tar::EntryType; -use super::cache::with_folder_sync_lock; +use crate::util::path::get_atomic_dir_path; pub fn verify_and_extract_tarball( package: &PackageNv, @@ -26,9 +26,19 @@ pub fn verify_and_extract_tarball( ) -> Result<(), AnyError> { verify_tarball_integrity(package, data, &dist_info.integrity())?; - with_folder_sync_lock(package, output_folder, || { - extract_tarball(data, output_folder) - }) + let temp_dir = get_atomic_dir_path(output_folder); + extract_tarball(data, &temp_dir)?; + match std::fs::rename(&temp_dir, output_folder) { + Ok(_) => {} + Err(err) => { + let _ = fs::remove_dir_all(&temp_dir); + if err.kind() != std::io::ErrorKind::AlreadyExists { + bail!("Failed to extract tarball: {:#}", err) + } + } + } + + Ok(()) } fn verify_tarball_integrity( diff --git a/cli/util/fs.rs b/cli/util/fs.rs index 92820ebe87cd14..fdc7855e6230b7 100644 --- a/cli/util/fs.rs +++ b/cli/util/fs.rs @@ -2,7 +2,6 @@ use std::collections::HashSet; use std::env::current_dir; -use std::fmt::Write as FmtWrite; use std::fs::FileType; use std::fs::OpenOptions; use std::io::Error; @@ -23,12 +22,12 @@ use deno_core::error::AnyError; pub use deno_core::normalize_path; use deno_core::unsync::spawn_blocking; use deno_core::ModuleSpecifier; -use deno_runtime::deno_crypto::rand; use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::PathClean; use crate::util::gitignore::DirGitIgnores; use crate::util::gitignore::GitIgnoreTree; +use crate::util::path::get_atomic_file_path; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; use crate::util::progress_bar::ProgressMessagePrompt; @@ -56,14 +55,7 @@ pub fn atomic_write_file>( } fn inner(file_path: &Path, data: &[u8], mode: u32) -> std::io::Result<()> { - let temp_file_path = { - let rand: String = (0..4).fold(String::new(), |mut output, _| { - let _ = write!(output, "{:02x}", rand::random::()); - output - }); - let extension = format!("{rand}.tmp"); - file_path.with_extension(extension) - }; + let temp_file_path = get_atomic_file_path(file_path); if let Err(write_err) = atomic_write_file_raw(&temp_file_path, file_path, data, mode) diff --git a/cli/util/path.rs b/cli/util/path.rs index a3109ad04ad9a5..3c848edea708b0 100644 --- a/cli/util/path.rs +++ b/cli/util/path.rs @@ -42,6 +42,32 @@ pub fn get_extension(file_path: &Path) -> Option { .map(|e| e.to_lowercase()); } +pub fn get_atomic_dir_path(file_path: &Path) -> PathBuf { + let rand = gen_rand_path_component(); + let new_file_name = format!( + ".{}_{}", + file_path + .file_name() + .map(|f| f.to_string_lossy()) + .unwrap_or(Cow::Borrowed("")), + rand + ); + file_path.with_file_name(new_file_name) +} + +pub fn get_atomic_file_path(file_path: &Path) -> PathBuf { + let rand = gen_rand_path_component(); + let extension = format!("{rand}.tmp"); + file_path.with_extension(extension) +} + +fn gen_rand_path_component() -> String { + (0..4).fold(String::new(), |mut output, _| { + output.push_str(&format!("{:02x}", rand::random::())); + output + }) +} + /// TypeScript figures out the type of file based on the extension, but we take /// other factors into account like the file headers. The hack here is to map the /// specifier passed to TypeScript to a new specifier with the file extension. From 34ec8eecd3d945562d018e82e4e1a635e41da13d Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 9 May 2024 19:37:19 -0400 Subject: [PATCH 2/8] Use retries --- cli/npm/managed/cache.rs | 20 ++++++++++++++----- cli/npm/managed/tarball.rs | 41 ++++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs index fe458d664877ac..464dd2291c5c48 100644 --- a/cli/npm/managed/cache.rs +++ b/cli/npm/managed/cache.rs @@ -91,14 +91,14 @@ impl NpmCache { async fn ensure_package_inner( &self, - package: &PackageNv, + package_nv: &PackageNv, dist: &NpmPackageVersionDistInfo, registry_url: &Url, ) -> Result<(), AnyError> { let package_folder = self .cache_dir - .package_folder_for_name_and_version(package, registry_url); - let should_use_cache = self.should_use_cache_for_package(package); + .package_folder_for_name_and_version(package_nv, registry_url); + let should_use_cache = self.should_use_cache_for_package(package_nv); if should_use_cache && self.fs.exists_sync(&package_folder) { return Ok(()); } else if self.cache_setting == CacheSetting::Only { @@ -106,7 +106,7 @@ impl NpmCache { "NotCached", format!( "An npm specifier not found in cache: \"{}\", --cached-only is specified.", - &package.name + &package_nv.name ) ) ); @@ -132,7 +132,17 @@ impl NpmCache { .context("Failed removing package directory.")?; } - verify_and_extract_tarball(package, &bytes, dist, &package_folder) + let dist = dist.clone(); + let package_nv = package_nv.clone(); + deno_core::unsync::spawn_blocking(move || { + verify_and_extract_tarball( + &package_nv, + &bytes, + &dist, + &package_folder, + ) + }) + .await? } None => { bail!("Could not find npm package tarball at: {}", dist.tarball); diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index 12b785ca9a5869..4ec6ea027c7cf9 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -2,12 +2,14 @@ use std::collections::HashSet; use std::fs; +use std::io::ErrorKind; use std::path::Path; use std::path::PathBuf; use base64::prelude::BASE64_STANDARD; use base64::Engine; use deno_core::anyhow::bail; +use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_npm::registry::NpmPackageVersionDistInfo; use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; @@ -19,26 +21,45 @@ use tar::EntryType; use crate::util::path::get_atomic_dir_path; pub fn verify_and_extract_tarball( - package: &PackageNv, + package_nv: &PackageNv, data: &[u8], dist_info: &NpmPackageVersionDistInfo, output_folder: &Path, ) -> Result<(), AnyError> { - verify_tarball_integrity(package, data, &dist_info.integrity())?; + verify_tarball_integrity(package_nv, data, &dist_info.integrity())?; let temp_dir = get_atomic_dir_path(output_folder); extract_tarball(data, &temp_dir)?; - match std::fs::rename(&temp_dir, output_folder) { - Ok(_) => {} - Err(err) => { - let _ = fs::remove_dir_all(&temp_dir); - if err.kind() != std::io::ErrorKind::AlreadyExists { - bail!("Failed to extract tarball: {:#}", err) + rename_with_retries(&temp_dir, output_folder) + .map_err(AnyError::from) + .context("Failed moving extracted tarball to final destination.") +} + +fn rename_with_retries( + temp_dir: &Path, + output_folder: &Path, +) -> Result<(), std::io::Error> { + let mut count = 0; + // renaming might be flaky if a lot of processes are trying + // to do this, so try again a few times + loop { + match fs::rename(temp_dir, output_folder) { + Ok(_) => return Ok(()), + Err(err) if err.kind() == ErrorKind::AlreadyExists => { + // another process copied here, just cleanup + let _ = fs::remove_dir_all(&temp_dir); + } + Err(err) => { + count += 1; + if count >= 3 { + let _ = fs::remove_dir_all(&temp_dir); + return Err(err); + } + // wait a bit before retrying + std::thread::sleep(std::time::Duration::from_millis(10)); } } } - - Ok(()) } fn verify_tarball_integrity( From 69c1457dd645d063ae16f40cdb4806ce4bc3e600 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 9 May 2024 19:46:37 -0400 Subject: [PATCH 3/8] Increase number of retries --- cli/npm/managed/tarball.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index 4ec6ea027c7cf9..86cb0337040d57 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -41,20 +41,22 @@ fn rename_with_retries( ) -> Result<(), std::io::Error> { let mut count = 0; // renaming might be flaky if a lot of processes are trying - // to do this, so try again a few times + // to do this, so retry a few times loop { match fs::rename(temp_dir, output_folder) { Ok(_) => return Ok(()), Err(err) if err.kind() == ErrorKind::AlreadyExists => { // another process copied here, just cleanup - let _ = fs::remove_dir_all(&temp_dir); + let _ = fs::remove_dir_all(temp_dir); } Err(err) => { count += 1; - if count >= 3 { - let _ = fs::remove_dir_all(&temp_dir); + if count >= 5 { + // too many tries, cleanup and return the error + let _ = fs::remove_dir_all(temp_dir); return Err(err); } + // wait a bit before retrying std::thread::sleep(std::time::Duration::from_millis(10)); } From 727c80b6cc9bd25fd0687bd0cacefc04cf30043a Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 9 May 2024 20:39:26 -0400 Subject: [PATCH 4/8] Sleep longer --- cli/npm/managed/tarball.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index 86cb0337040d57..a9038d20429f4c 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -57,8 +57,10 @@ fn rename_with_retries( return Err(err); } - // wait a bit before retrying - std::thread::sleep(std::time::Duration::from_millis(10)); + // wait a bit before retrying... this should be very rare or only + // in error cases, so ok to sleep a bit + let sleep_ms = std::cmp::min(50, 10 * count); + std::thread::sleep(std::time::Duration::from_millis(sleep_ms)); } } } From d275cf7f5b62ea0012123119411c866b77683b07 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 9 May 2024 20:41:38 -0400 Subject: [PATCH 5/8] increase more --- cli/npm/managed/tarball.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index a9038d20429f4c..f63d3616601e23 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -59,7 +59,7 @@ fn rename_with_retries( // wait a bit before retrying... this should be very rare or only // in error cases, so ok to sleep a bit - let sleep_ms = std::cmp::min(50, 10 * count); + let sleep_ms = std::cmp::min(100, 20 * count); std::thread::sleep(std::time::Duration::from_millis(sleep_ms)); } } From add30174dd6165c049e6ca3e3f7fbbc2e05d98ab Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 9 May 2024 21:10:54 -0400 Subject: [PATCH 6/8] Ensure the package dir doesn't get corrupted with killing deno while it's running with --reload --- cli/npm/managed/cache.rs | 19 ++++++++++--------- cli/npm/managed/tarball.rs | 30 ++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs index 464dd2291c5c48..53b449eeb1992e 100644 --- a/cli/npm/managed/cache.rs +++ b/cli/npm/managed/cache.rs @@ -25,6 +25,7 @@ use crate::util::fs::hard_link_dir_recursive; use crate::util::progress_bar::ProgressBar; use super::tarball::verify_and_extract_tarball; +use super::tarball::TarballExtractionMode; /// Stores a single copy of npm packages in a cache. #[derive(Debug)] @@ -123,15 +124,14 @@ impl NpmCache { .await?; match maybe_bytes { Some(bytes) => { - // the user ran with `--reload`, so remove the existing directory - if !should_use_cache && self.fs.exists_sync(&package_folder) { - self - .fs - .remove_sync(&package_folder, true) - .map_err(AnyError::from) - .context("Failed removing package directory.")?; - } - + let extraction_mode = if should_use_cache { + TarballExtractionMode::SiblingTempDir + } else { + // the user ran with `--reload`, so overwrite the package instead of + // deleting it since the package might get corrupted if a user kills + // their deno process while it's deleting a package directory + TarballExtractionMode::Overwrite + }; let dist = dist.clone(); let package_nv = package_nv.clone(); deno_core::unsync::spawn_blocking(move || { @@ -140,6 +140,7 @@ impl NpmCache { &bytes, &dist, &package_folder, + extraction_mode, ) }) .await? diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index f63d3616601e23..5bcac7fa4d6018 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -20,19 +20,37 @@ use tar::EntryType; use crate::util::path::get_atomic_dir_path; +#[derive(Debug, Copy, Clone)] +pub enum TarballExtractionMode { + /// Overwrites the destination directory without deleting any files. + Overwrite, + /// Creates and writes to a sibling temporary directory. When done, moves + /// it to the final destination. + /// + /// This is more robust than `Overwrite` as it better handles multiple + /// processes writing to the directory at the same time. + SiblingTempDir, +} + pub fn verify_and_extract_tarball( package_nv: &PackageNv, data: &[u8], dist_info: &NpmPackageVersionDistInfo, output_folder: &Path, + extraction_mode: TarballExtractionMode, ) -> Result<(), AnyError> { verify_tarball_integrity(package_nv, data, &dist_info.integrity())?; - let temp_dir = get_atomic_dir_path(output_folder); - extract_tarball(data, &temp_dir)?; - rename_with_retries(&temp_dir, output_folder) - .map_err(AnyError::from) - .context("Failed moving extracted tarball to final destination.") + match extraction_mode { + TarballExtractionMode::Overwrite => extract_tarball(data, output_folder), + TarballExtractionMode::SiblingTempDir => { + let temp_dir = get_atomic_dir_path(output_folder); + extract_tarball(data, &temp_dir)?; + rename_with_retries(&temp_dir, output_folder) + .map_err(AnyError::from) + .context("Failed moving extracted tarball to final destination.") + } + } } fn rename_with_retries( @@ -51,7 +69,7 @@ fn rename_with_retries( } Err(err) => { count += 1; - if count >= 5 { + if count > 5 { // too many tries, cleanup and return the error let _ = fs::remove_dir_all(temp_dir); return Err(err); From 5d3d62ca06712eafcceb03863a8e0264bdda0287 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Fri, 10 May 2024 09:49:46 -0400 Subject: [PATCH 7/8] Actually fix. --- cli/npm/managed/tarball.rs | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs index 5bcac7fa4d6018..e2d242e6623e0a 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/tarball.rs @@ -57,20 +57,26 @@ fn rename_with_retries( temp_dir: &Path, output_folder: &Path, ) -> Result<(), std::io::Error> { + fn already_exists(err: &std::io::Error, output_folder: &Path) -> bool { + // Windows will do an "Access is denied" error + err.kind() == ErrorKind::AlreadyExists || output_folder.exists() + } + let mut count = 0; // renaming might be flaky if a lot of processes are trying // to do this, so retry a few times loop { match fs::rename(temp_dir, output_folder) { Ok(_) => return Ok(()), - Err(err) if err.kind() == ErrorKind::AlreadyExists => { + Err(err) if already_exists(&err, output_folder) => { // another process copied here, just cleanup let _ = fs::remove_dir_all(temp_dir); + return Ok(()); } Err(err) => { count += 1; if count > 5 { - // too many tries, cleanup and return the error + // too many retries, cleanup and return the error let _ = fs::remove_dir_all(temp_dir); return Err(err); } @@ -203,6 +209,7 @@ fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { #[cfg(test)] mod test { use deno_semver::Version; + use test_util::TempDir; use super::*; @@ -293,4 +300,25 @@ mod test { ) .is_ok()); } + + #[test] + fn rename_with_retries_succeeds_exists() { + let temp_dir = TempDir::new(); + let folder_1 = temp_dir.path().join("folder_1"); + let folder_2 = temp_dir.path().join("folder_2"); + + folder_1.create_dir_all(); + folder_1.join("a.txt").write("test"); + folder_2.create_dir_all(); + // this will not end up in the output as rename_with_retries assumes + // the folders ending up at the destination are the same + folder_2.join("b.txt").write("test2"); + + let dest_folder = temp_dir.path().join("dest_folder"); + + rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap(); + rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap(); + assert!(dest_folder.join("a.txt").exists()); + assert!(!dest_folder.join("b.txt").exists()); + } } From 77e19212674a3177ed9c5f4b1b599000733aa252 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Tue, 14 May 2024 13:55:22 -0400 Subject: [PATCH 8/8] add comment explaining why we cannot rename and delete --- cli/npm/managed/cache.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs index 53b449eeb1992e..44b98fcee0b0d8 100644 --- a/cli/npm/managed/cache.rs +++ b/cli/npm/managed/cache.rs @@ -100,7 +100,8 @@ impl NpmCache { .cache_dir .package_folder_for_name_and_version(package_nv, registry_url); let should_use_cache = self.should_use_cache_for_package(package_nv); - if should_use_cache && self.fs.exists_sync(&package_folder) { + let package_folder_exists = self.fs.exists_sync(&package_folder); + if should_use_cache && package_folder_exists { return Ok(()); } else if self.cache_setting == CacheSetting::Only { return Err(custom_error( @@ -124,12 +125,17 @@ impl NpmCache { .await?; match maybe_bytes { Some(bytes) => { - let extraction_mode = if should_use_cache { + let extraction_mode = if should_use_cache || !package_folder_exists { TarballExtractionMode::SiblingTempDir } else { - // the user ran with `--reload`, so overwrite the package instead of + // The user ran with `--reload`, so overwrite the package instead of // deleting it since the package might get corrupted if a user kills // their deno process while it's deleting a package directory + // + // We can't rename this folder and delete it because the folder + // may be in use by another process or may now contain hardlinks, + // which will cause windows to throw an "AccessDenied" error when + // renaming. So we settle for overwriting. TarballExtractionMode::Overwrite }; let dist = dist.clone();