From 529aae6fc368818bf64d21aab217826a6c0745fc Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 18 Nov 2024 18:58:56 +0100 Subject: [PATCH 1/3] wasi/fs: Improve stopping condition for ::next When upgrading [Zed](https://github.com/zed-industries/zed/pull/19349) to Rust 1.82 I've encountered a test failure in our test suite. Specifically, one of our extension tests started hanging. I've tracked it down to a call to std::fs::remove_dir_all not returning when an extension is compiled with Rust 1.82 Our extension system uses WASM components, thus I've looked at the diff between 1.81 and 1.82 with respect to WASI and found 736f773844e7ebf05ccb827c17b7ad9eb28aa295 As it turned out, calling remove_dir_all from extension returned io::ErrorKind::NotFound in 1.81; the underlying issue is that the ReadDir iterator never actually terminates iteration, however since it loops around, with 1.81 we'd come across an entry second time and fail to remove it, since it would've been removed previously. With 1.82 and 736f773844e7ebf05ccb827c17b7ad9eb28aa295 it is no longer the case, thus we're seeing the hang. This commit makes ReadDir::next adhere to readdir contract, namely it will no longer call readdir once the returned # of bytes is smaller than the size of a passed-in buffer. Previously we'd only terminate the loop if readdir returned 0. --- library/std/src/sys/pal/wasi/fs.rs | 31 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/library/std/src/sys/pal/wasi/fs.rs b/library/std/src/sys/pal/wasi/fs.rs index 3296c762cca2b..b519a0fe6a3a4 100644 --- a/library/std/src/sys/pal/wasi/fs.rs +++ b/library/std/src/sys/pal/wasi/fs.rs @@ -172,20 +172,22 @@ impl Iterator for ReadDir { let offset = if self.offset == self.cap { let cookie = self.cookie.take()?; match self.inner.dir.fd.readdir(&mut self.buf, cookie) { - Ok(bytes) => self.cap = bytes, + Ok(bytes) => { + // No more entries if we read less than buffer size + if bytes < self.buf.len() { + self.cookie = None; + if bytes == 0 { + return None; + } + } else { + self.cookie = Some(cookie); + } + self.cap = self.buf.len(); + self.offset = 0; + 0 + } Err(e) => return Some(Err(e)), } - self.offset = 0; - self.cookie = Some(cookie); - - // If we didn't actually read anything, this is in theory the - // end of the directory. - if self.cap == 0 { - self.cookie = None; - return None; - } - - 0 } else { self.offset }; @@ -197,7 +199,6 @@ impl Iterator for ReadDir { // where we last left off. let dirent_size = mem::size_of::(); if data.len() < dirent_size { - assert!(self.cookie.is_some()); assert!(self.buf.len() >= dirent_size); self.offset = self.cap; continue; @@ -218,7 +219,9 @@ impl Iterator for ReadDir { self.offset = self.cap; continue; } - self.cookie = Some(dirent.d_next); + self.cookie.as_mut().map(|cookie| { + *cookie = dirent.d_next; + }); self.offset = offset + dirent_size + dirent.d_namlen as usize; let name = &data[..(dirent.d_namlen as usize)]; From 692c19ae8032f80fc75912b98651bfc841cfbaf0 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 26 Nov 2024 13:45:17 +0100 Subject: [PATCH 2/3] Refactor ReadDir into a state machine --- library/std/src/sys/pal/wasi/fs.rs | 171 +++++++++++++++++------------ 1 file changed, 101 insertions(+), 70 deletions(-) diff --git a/library/std/src/sys/pal/wasi/fs.rs b/library/std/src/sys/pal/wasi/fs.rs index b519a0fe6a3a4..5c6ddfa559eca 100644 --- a/library/std/src/sys/pal/wasi/fs.rs +++ b/library/std/src/sys/pal/wasi/fs.rs @@ -27,10 +27,26 @@ pub struct FileAttr { pub struct ReadDir { inner: Arc, - cookie: Option, - buf: Vec, - offset: usize, - cap: usize, + state: ReadDirState, +} + +enum ReadDirState { + /// Next DirEntry should be read from contents of buf at `offset` + FillBuffer { + next_read_offset: wasi::Dircookie, + buf: Vec, + }, + ProcessEntry { + buf: Vec, + next_read_offset: Option, + offset: usize, + }, + /// Do not fetch any more entries, process all entries + RunUntilExhaustion { + buf: Vec, + offset: usize, + }, + Done, } struct ReadDirInner { @@ -147,11 +163,8 @@ impl FileType { impl ReadDir { fn new(dir: File, root: PathBuf) -> ReadDir { ReadDir { - cookie: Some(0), - buf: vec![0; 128], - offset: 0, - cap: 0, inner: Arc::new(ReadDirInner { dir, root }), + state: ReadDirState::FillBuffer { next_read_offset: 0, buf: vec![0; 128] }, } } } @@ -162,81 +175,99 @@ impl fmt::Debug for ReadDir { } } +impl core::iter::FusedIterator for ReadDir {} + impl Iterator for ReadDir { type Item = io::Result; fn next(&mut self) -> Option> { - loop { - // If we've reached the capacity of our buffer then we need to read - // some more from the OS, otherwise we pick up at our old offset. - let offset = if self.offset == self.cap { - let cookie = self.cookie.take()?; - match self.inner.dir.fd.readdir(&mut self.buf, cookie) { - Ok(bytes) => { - // No more entries if we read less than buffer size - if bytes < self.buf.len() { - self.cookie = None; - if bytes == 0 { - return None; - } + match &mut self.state { + ReadDirState::FillBuffer { next_read_offset, ref mut buf } => { + let result = self.inner.dir.fd.readdir(buf, *next_read_offset); + match result { + Ok(read_bytes) => { + if read_bytes < buf.len() { + buf.truncate(read_bytes); + self.state = + ReadDirState::RunUntilExhaustion { buf: mem::take(buf), offset: 0 }; } else { - self.cookie = Some(cookie); + debug_assert_eq!(read_bytes, buf.len()); + self.state = ReadDirState::ProcessEntry { + buf: mem::take(buf), + offset: 0, + next_read_offset: Some(*next_read_offset), + }; } - self.cap = self.buf.len(); - self.offset = 0; - 0 + self.next() + } + Err(e) => { + self.state = ReadDirState::Done; + return Some(Err(e)); } - Err(e) => return Some(Err(e)), - } - } else { - self.offset - }; - let data = &self.buf[offset..self.cap]; - - // If we're not able to read a directory entry then that means it - // must have been truncated at the end of the buffer, so reset our - // offset so we can go back and reread into the buffer, picking up - // where we last left off. - let dirent_size = mem::size_of::(); - if data.len() < dirent_size { - assert!(self.buf.len() >= dirent_size); - self.offset = self.cap; - continue; - } - let (dirent, data) = data.split_at(dirent_size); - let dirent = unsafe { ptr::read_unaligned(dirent.as_ptr() as *const wasi::Dirent) }; - - // If the file name was truncated, then we need to reinvoke - // `readdir` so we truncate our buffer to start over and reread this - // descriptor. Note that if our offset is 0 that means the file name - // is massive and we need a bigger buffer. - if data.len() < dirent.d_namlen as usize { - if offset == 0 { - let amt_to_add = self.buf.capacity(); - self.buf.extend(iter::repeat(0).take(amt_to_add)); } - assert!(self.cookie.is_some()); - self.offset = self.cap; - continue; } - self.cookie.as_mut().map(|cookie| { - *cookie = dirent.d_next; - }); - self.offset = offset + dirent_size + dirent.d_namlen as usize; + ReadDirState::ProcessEntry { ref mut buf, next_read_offset, offset } => { + let contents = &buf[*offset..]; + const DIRENT_SIZE: usize = crate::mem::size_of::(); + if contents.len() >= DIRENT_SIZE { + let (dirent, data) = contents.split_at(DIRENT_SIZE); + let dirent = + unsafe { ptr::read_unaligned(dirent.as_ptr() as *const wasi::Dirent) }; + // If the file name was truncated, then we need to reinvoke + // `readdir` so we truncate our buffer to start over and reread this + // descriptor. + if data.len() < dirent.d_namlen as usize { + if buf.len() < dirent.d_namlen as usize + DIRENT_SIZE { + buf.resize(dirent.d_namlen as usize + DIRENT_SIZE, 0); + } + if let Some(next_read_offset) = *next_read_offset { + self.state = + ReadDirState::FillBuffer { next_read_offset, buf: mem::take(buf) }; + } else { + self.state = ReadDirState::Done; + } + + return self.next(); + } + next_read_offset.as_mut().map(|cookie| { + *cookie = dirent.d_next; + }); + *offset = *offset + DIRENT_SIZE + dirent.d_namlen as usize; + + let name = &data[..(dirent.d_namlen as usize)]; - let name = &data[..(dirent.d_namlen as usize)]; + // These names are skipped on all other platforms, so let's skip + // them here too + if name == b"." || name == b".." { + return self.next(); + } - // These names are skipped on all other platforms, so let's skip - // them here too - if name == b"." || name == b".." { - continue; + return Some(Ok(DirEntry { + meta: dirent, + name: name.to_vec(), + inner: self.inner.clone(), + })); + } else if let Some(next_read_offset) = *next_read_offset { + self.state = ReadDirState::FillBuffer { next_read_offset, buf: mem::take(buf) }; + } else { + self.state = ReadDirState::Done; + } + self.next() } + ReadDirState::RunUntilExhaustion { buf, offset } => { + if *offset >= buf.len() { + self.state = ReadDirState::Done; + } else { + self.state = ReadDirState::ProcessEntry { + buf: mem::take(buf), + offset: *offset, + next_read_offset: None, + }; + } - return Some(Ok(DirEntry { - meta: dirent, - name: name.to_vec(), - inner: self.inner.clone(), - })); + self.next() + } + ReadDirState::Done => None, } } } From f4ab9829e1a3c6e564f79c98f39632aecbc675d3 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 26 Nov 2024 22:46:12 +0100 Subject: [PATCH 3/3] chore: Improve doc comments --- library/std/src/sys/pal/wasi/fs.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/library/std/src/sys/pal/wasi/fs.rs b/library/std/src/sys/pal/wasi/fs.rs index 5c6ddfa559eca..9e233436a8447 100644 --- a/library/std/src/sys/pal/wasi/fs.rs +++ b/library/std/src/sys/pal/wasi/fs.rs @@ -31,7 +31,7 @@ pub struct ReadDir { } enum ReadDirState { - /// Next DirEntry should be read from contents of buf at `offset` + /// Fill `buf` with `buf.len()` bytes starting from `next_read_offset`. FillBuffer { next_read_offset: wasi::Dircookie, buf: Vec, @@ -41,7 +41,8 @@ enum ReadDirState { next_read_offset: Option, offset: usize, }, - /// Do not fetch any more entries, process all entries + /// There is no more data to get in [`Self::FillBuffer`]; keep returning + /// entries via ProcessEntry until `buf` is exhausted. RunUntilExhaustion { buf: Vec, offset: usize,