From 00d07b1109d3430044dff3d74382565652a6ac74 Mon Sep 17 00:00:00 2001 From: Jason Jean Date: Thu, 15 Jun 2023 16:36:33 -0400 Subject: [PATCH] fix(core): dedupe project files in rust (#17618) --- packages/nx/src/native/hasher.rs | 8 +- packages/nx/src/native/parallel_walker.rs | 27 ++---- .../src/native/tests/workspace_files.spec.ts | 48 +++++++++- packages/nx/src/native/utils/mod.rs | 1 + packages/nx/src/native/utils/path.rs | 14 +++ .../src/native/workspace/get_config_files.rs | 96 ++++++++++++++++++- .../workspace/get_nx_workspace_files.rs | 57 ++++++----- 7 files changed, 197 insertions(+), 54 deletions(-) create mode 100644 packages/nx/src/native/utils/path.rs diff --git a/packages/nx/src/native/hasher.rs b/packages/nx/src/native/hasher.rs index 742c2de0999d4..7e5a50d179124 100644 --- a/packages/nx/src/native/hasher.rs +++ b/packages/nx/src/native/hasher.rs @@ -3,6 +3,7 @@ use crate::native::parallel_walker::nx_walker; use crate::native::types::FileData; use crate::native::utils::glob::build_glob_set; +use crate::native::utils::path::Normalize; use anyhow::anyhow; use crossbeam_channel::unbounded; use globset::{Glob, GlobSetBuilder}; @@ -39,7 +40,10 @@ fn hash_files(workspace_root: String) -> HashMap { nx_walker(workspace_root, |rec| { let mut collection: HashMap = HashMap::new(); for (path, content) in rec { - collection.insert(path, xxh3::xxh3_64(&content).to_string()); + collection.insert( + path.to_normalized_string(), + xxh3::xxh3_64(&content).to_string(), + ); } collection }) @@ -57,7 +61,7 @@ fn hash_files_matching_globs( for (path, content) in receiver { if glob_set.is_match(&path) { collection.push(FileData { - file: path, + file: path.to_normalized_string(), hash: xxh3::xxh3_64(&content).to_string(), }); } diff --git a/packages/nx/src/native/parallel_walker.rs b/packages/nx/src/native/parallel_walker.rs index 499f4b10fc719..6833837b025a9 100644 --- a/packages/nx/src/native/parallel_walker.rs +++ b/packages/nx/src/native/parallel_walker.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use std::thread; use std::thread::available_parallelism; @@ -8,7 +8,7 @@ use ignore::WalkBuilder; pub fn nx_walker(directory: P, f: Fn) -> Re where P: AsRef, - Fn: FnOnce(Receiver<(String, Vec)>) -> Re + Send + 'static, + Fn: FnOnce(Receiver<(PathBuf, Vec)>) -> Re + Send + 'static, Re: Send + 'static, { let directory = directory.as_ref(); @@ -27,7 +27,7 @@ where let cpus = available_parallelism().map_or(2, |n| n.get()) - 1; - let (sender, receiver) = unbounded::<(String, Vec)>(); + let (sender, receiver) = unbounded::<(PathBuf, Vec)>(); let receiver_thread = thread::spawn(|| f(receiver)); @@ -49,15 +49,7 @@ where return Continue; }; - let Some(file_path) = file_path.to_str() else { - return Continue; - }; - - // convert back-slashes in Windows paths, since the js expects only forward-slash path separators - #[cfg(target_os = "windows")] - let file_path = file_path.replace('\\', "/"); - - tx.send((file_path.to_string(), content)).ok(); + tx.send((file_path.into(), content)).ok(); Continue }) @@ -70,6 +62,7 @@ where #[cfg(test)] mod test { use super::*; + use crate::native::utils::path::Normalize; use assert_fs::prelude::*; use assert_fs::TempDir; use std::collections::HashMap; @@ -117,10 +110,10 @@ mod test { assert_eq!( content, HashMap::from([ - ("baz/qux.txt".into(), "content@qux".into()), - ("foo.txt".into(), "content1".into()), - ("test.txt".into(), "content".into()), - ("bar.txt".into(), "content2".into()), + (PathBuf::from("baz/qux.txt"), "content@qux".into()), + (PathBuf::from("foo.txt"), "content1".into()), + (PathBuf::from("test.txt"), "content".into()), + (PathBuf::from("bar.txt"), "content2".into()), ]) ); } @@ -155,7 +148,7 @@ nested/child-two/ let mut file_names = nx_walker(temp_dir, |rec| { let mut file_names = vec![]; for (path, _) in rec { - file_names.push(path); + file_names.push(path.to_normalized_string()); } file_names }); diff --git a/packages/nx/src/native/tests/workspace_files.spec.ts b/packages/nx/src/native/tests/workspace_files.spec.ts index 1c966bae53f8e..de31dd3dc697c 100644 --- a/packages/nx/src/native/tests/workspace_files.spec.ts +++ b/packages/nx/src/native/tests/workspace_files.spec.ts @@ -1,4 +1,8 @@ -import { getWorkspaceFilesNative, WorkspaceErrors } from '../index'; +import { + getConfigFiles, + getWorkspaceFilesNative, + WorkspaceErrors, +} from '../index'; import { TempFs } from '../../utils/testing/temp-fs'; import { NxJsonConfiguration } from '../../config/nx-json'; @@ -124,6 +128,48 @@ describe('workspace files', () => { ] `); }); + it('should dedupe configuration files', async () => { + const fs = new TempFs('workspace-files'); + const nxJson: NxJsonConfiguration = {}; + await fs.createFiles({ + './nx.json': JSON.stringify(nxJson), + './package.json': JSON.stringify({ + name: 'repo-name', + version: '0.0.0', + dependencies: {}, + }), + './project.json': JSON.stringify({ + name: 'repo-name', + }), + './libs/project1/project.json': JSON.stringify({ + name: 'project1', + }), + './libs/project1/package.json': JSON.stringify({ + name: 'project1', + }), + './libs/project1/index.js': '', + }); + + let globs = ['project.json', '**/project.json', '**/package.json']; + let { configFiles } = getWorkspaceFilesNative(fs.tempDir, globs); + + configFiles = configFiles.sort(); + + expect(configFiles).toMatchInlineSnapshot(` + [ + "libs/project1/project.json", + "project.json", + ] + `); + + let configFiles2 = getConfigFiles(fs.tempDir, globs).sort(); + expect(configFiles2).toMatchInlineSnapshot(` + [ + "libs/project1/project.json", + "project.json", + ] + `); + }); describe('errors', () => { it('it should infer names of configuration files without a name', async () => { diff --git a/packages/nx/src/native/utils/mod.rs b/packages/nx/src/native/utils/mod.rs index 9ee2e9707b8e1..2a4a60e8289b4 100644 --- a/packages/nx/src/native/utils/mod.rs +++ b/packages/nx/src/native/utils/mod.rs @@ -1 +1,2 @@ pub mod glob; +pub mod path; diff --git a/packages/nx/src/native/utils/path.rs b/packages/nx/src/native/utils/path.rs new file mode 100644 index 0000000000000..1591bdbccb1c9 --- /dev/null +++ b/packages/nx/src/native/utils/path.rs @@ -0,0 +1,14 @@ +pub trait Normalize { + fn to_normalized_string(&self) -> String; +} + +impl Normalize for std::path::Path { + fn to_normalized_string(&self) -> String { + // convert back-slashes in Windows paths, since the js expects only forward-slash path separators + if cfg!(windows) { + self.display().to_string().replace('\\', "/") + } else { + self.display().to_string() + } + } +} diff --git a/packages/nx/src/native/workspace/get_config_files.rs b/packages/nx/src/native/workspace/get_config_files.rs index 701e23c6013f3..880b3488363c2 100644 --- a/packages/nx/src/native/workspace/get_config_files.rs +++ b/packages/nx/src/native/workspace/get_config_files.rs @@ -1,17 +1,103 @@ use crate::native::parallel_walker::nx_walker; use crate::native::utils::glob::build_glob_set; +use crate::native::utils::path::Normalize; +use globset::GlobSet; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; #[napi] /// Get workspace config files based on provided globs pub fn get_config_files(workspace_root: String, globs: Vec) -> anyhow::Result> { let globs = build_glob_set(globs)?; Ok(nx_walker(workspace_root, move |rec| { - let mut config_paths: Vec = vec![]; - for (path, _) in rec { - if globs.is_match(&path) { - config_paths.push(path.to_owned()); - } + let mut config_paths: HashMap)> = HashMap::new(); + for (path, content) in rec { + insert_config_file_into_map((path, content), &mut config_paths, &globs); } config_paths + .into_iter() + .map(|(_, (val, _))| val.to_normalized_string()) + .collect() })) } + +pub fn insert_config_file_into_map( + (path, content): (PathBuf, Vec), + config_paths: &mut HashMap)>, + globs: &GlobSet, +) { + if globs.is_match(&path) { + let parent = path.parent().unwrap_or_else(|| Path::new("")).to_path_buf(); + + let file_name = path + .file_name() + .expect("Config paths always have file names"); + if file_name == "project.json" { + config_paths.insert(parent, (path, content)); + } else if file_name == "package.json" { + match config_paths.entry(parent) { + Entry::Occupied(mut o) => { + if o.get() + .0 + .file_name() + .expect("Config paths always have file names") + != "project.json" + { + o.insert((path, content)); + } + } + Entry::Vacant(v) => { + v.insert((path, content)); + } + } + } else { + config_paths.entry(parent).or_insert((path, content)); + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::collections::HashMap; + use std::path::PathBuf; + + #[test] + fn should_insert_config_files_properly() { + let mut config_paths: HashMap)> = HashMap::new(); + let globs = build_glob_set(vec!["**/*".into()]).unwrap(); + + insert_config_file_into_map( + (PathBuf::from("project.json"), vec![]), + &mut config_paths, + &globs, + ); + insert_config_file_into_map( + (PathBuf::from("package.json"), vec![]), + &mut config_paths, + &globs, + ); + insert_config_file_into_map( + (PathBuf::from("lib1/project.json"), vec![]), + &mut config_paths, + &globs, + ); + insert_config_file_into_map( + (PathBuf::from("lib2/package.json"), vec![]), + &mut config_paths, + &globs, + ); + + let config_files: Vec = config_paths + .into_iter() + .map(|(_, (path, _))| path) + .collect(); + + assert!(config_files.contains(&PathBuf::from("project.json"))); + assert!(config_files.contains(&PathBuf::from("lib1/project.json"))); + assert!(config_files.contains(&PathBuf::from("lib2/package.json"))); + + assert!(!config_files.contains(&PathBuf::from("package.json"))); + } +} diff --git a/packages/nx/src/native/workspace/get_nx_workspace_files.rs b/packages/nx/src/native/workspace/get_nx_workspace_files.rs index 7ae2d3c7d5b57..e903474b27793 100644 --- a/packages/nx/src/native/workspace/get_nx_workspace_files.rs +++ b/packages/nx/src/native/workspace/get_nx_workspace_files.rs @@ -1,6 +1,5 @@ use jsonc_parser::ParseOptions; use std::collections::HashMap; -use std::ffi::OsStr; use std::path::{Path, PathBuf}; use rayon::prelude::*; @@ -11,7 +10,9 @@ use crate::native::logger::enable_logger; use crate::native::parallel_walker::nx_walker; use crate::native::types::FileData; use crate::native::utils::glob::build_glob_set; +use crate::native::utils::path::Normalize; use crate::native::workspace::errors::{InternalWorkspaceErrors, WorkspaceErrors}; +use crate::native::workspace::get_config_files::insert_config_file_into_map; use crate::native::workspace::types::{FileLocation, ProjectConfiguration}; #[napi(object)] @@ -89,17 +90,19 @@ pub fn get_workspace_files_native( Ok(NxWorkspaceFiles { project_file_map, global_files, - config_files: projects.iter().map(|(path, _)| path.clone()).collect(), + config_files: projects + .keys() + .map(|path| path.to_normalized_string()) + .collect(), }) } fn create_root_map( - projects: &Vec<(String, Vec)>, + projects: &HashMap>, ) -> Result, InternalWorkspaceErrors> { projects .par_iter() .map(|(path, content)| { - let path = Path::new(path); let file_name = path .file_name() .expect("path should always have a filename"); @@ -128,31 +131,29 @@ fn create_root_map( }) }?; Ok((parent_path, name)) + } else if let Some(parent_path) = path.parent() { + Ok(( + parent_path, + parent_path + .file_name() + .unwrap_or_default() + .to_os_string() + .into_string() + .map_err(|os_string| InternalWorkspaceErrors::Generic { + msg: format!("Cannot turn {os_string:?} into String"), + })?, + )) } else { - if let Some(parent_path) = path.parent() { - Ok(( - parent_path, - parent_path - .file_name() - .unwrap_or_default() - .to_os_string() - .into_string() - .map_err(|os_string| InternalWorkspaceErrors::Generic { - msg: format!("Cannot turn {os_string:?} into String"), - })?, - )) - } else { - Err(InternalWorkspaceErrors::Generic { - msg: format!("{path:?} has no parent"), - }) - } + Err(InternalWorkspaceErrors::Generic { + msg: format!("{path:?} has no parent"), + }) }; }) .collect() } fn read_project_configuration( - content: &Vec, + content: &[u8], path: &Path, ) -> Result { serde_json::from_slice(content).or_else(|_| { @@ -169,22 +170,20 @@ fn read_project_configuration( }) } -type WorkspaceData = (Vec<(String, Vec)>, Vec); +type WorkspaceData = (HashMap>, Vec); fn get_file_data(workspace_root: &str, globs: Vec) -> anyhow::Result { let globs = build_glob_set(globs)?; let (projects, file_data) = nx_walker(workspace_root, move |rec| { - let mut projects: Vec<(String, Vec)> = vec![]; + let mut projects: HashMap)> = HashMap::new(); let mut file_hashes: Vec = vec![]; for (path, content) in rec { file_hashes.push(FileData { - file: path.clone(), + file: path.to_normalized_string(), hash: xxh3::xxh3_64(&content).to_string(), }); - if globs.is_match(&path) { - projects.push((path, content)); - } + insert_config_file_into_map((path, content), &mut projects, &globs) } (projects, file_hashes) }); - Ok((projects, file_data)) + Ok((projects.into_values().collect(), file_data)) }