Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor tail #3905

Merged
merged 1 commit into from
Sep 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
473 changes: 473 additions & 0 deletions src/uu/tail/src/args.rs

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions src/uu/tail/src/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// spell-checker:ignore (ToDO) filehandle BUFSIZ
use std::collections::VecDeque;
use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom, Write};
use std::io::{BufRead, Read, Seek, SeekFrom, Write};
use uucore::error::UResult;

/// When reading files in reverse in `bounded_tail`, this is the size of each
Expand Down Expand Up @@ -208,7 +208,7 @@ impl BytesChunk {
/// that number of bytes. If EOF is reached (so 0 bytes are read), then returns
/// [`UResult<None>`] or else the result with [`Some(bytes)`] where bytes is the number of bytes
/// read from the source.
pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult<Option<usize>> {
let num_bytes = filehandle.read(&mut self.buffer)?;
self.bytes = num_bytes;
if num_bytes == 0 {
Expand Down Expand Up @@ -283,7 +283,7 @@ impl BytesChunkBuffer {
/// let mut chunks = BytesChunkBuffer::new(num_print);
/// chunks.fill(&mut reader).unwrap();
/// ```
pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> {
let mut chunk = Box::new(BytesChunk::new());

// fill chunks with all bytes from reader and reuse already instantiated chunks if possible
Expand Down Expand Up @@ -323,6 +323,10 @@ impl BytesChunkBuffer {
}
Ok(())
}

pub fn has_data(&self) -> bool {
!self.chunks.is_empty()
}
}

/// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current
Expand Down Expand Up @@ -452,7 +456,7 @@ impl LinesChunk {
/// that number of bytes. This function works like the [`BytesChunk::fill`] function besides
/// that this function also counts and stores the number of lines encountered while reading from
/// the `filehandle`.
pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
pub fn fill(&mut self, filehandle: &mut impl BufRead) -> UResult<Option<usize>> {
match self.chunk.fill(filehandle)? {
None => {
self.lines = 0;
Expand Down Expand Up @@ -556,7 +560,7 @@ impl LinesChunkBuffer {
/// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator
/// over these chunks. If there are no chunks, for example because the piped stdin contained no
/// lines, or `num_print = 0` then `iterator.next` will return None.
pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
pub fn fill(&mut self, reader: &mut impl BufRead) -> UResult<()> {
let mut chunk = Box::new(LinesChunk::new(self.delimiter));

while (chunk.fill(reader)?).is_some() {
Expand Down
211 changes: 211 additions & 0 deletions src/uu/tail/src/follow/files.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.

// spell-checker:ignore tailable seekable stdlib (stdlib)

use crate::args::Settings;
use crate::chunks::BytesChunkBuffer;
use crate::paths::{HeaderPrinter, PathExtTail};
use crate::text;
use std::collections::hash_map::Keys;
use std::collections::HashMap;
Comment on lines +12 to +13
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
use std::collections::hash_map::Keys;
use std::collections::HashMap;
use std::collections::{hash_map::Keys, HashMap};

use std::fs::{File, Metadata};
use std::io::{stdout, BufRead, BufReader, BufWriter};

use std::path::{Path, PathBuf};
use uucore::error::UResult;

/// Data structure to keep a handle on files to follow.
/// `last` always holds the path/key of the last file that was printed from.
/// The keys of the HashMap can point to an existing file path (normal case),
/// or stdin ("-"), or to a non existing path (--retry).
/// For existing files, all keys in the HashMap are absolute Paths.
pub struct FileHandling {
map: HashMap<PathBuf, PathData>,
last: Option<PathBuf>,
header_printer: HeaderPrinter,
}

impl FileHandling {
pub fn from(settings: &Settings) -> Self {
Self {
map: HashMap::with_capacity(settings.inputs.len()),
last: None,
header_printer: HeaderPrinter::new(settings.verbose, false),
}
}

/// Wrapper for HashMap::insert using Path::canonicalize
pub fn insert(&mut self, k: &Path, v: PathData, update_last: bool) {
let k = Self::canonicalize_path(k);
if update_last {
self.last = Some(k.to_owned());
}
let _ = self.map.insert(k, v);
}

/// Wrapper for HashMap::remove using Path::canonicalize
pub fn remove(&mut self, k: &Path) -> PathData {
self.map.remove(&Self::canonicalize_path(k)).unwrap()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is unwrap okay for these methods?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually don't think it is and tend to return an Option or Result from these methods, but I need to dig deeper through the watch module to figure this out. These functions are heavily used. I can address this issue together with refactoring watch in the next pr.

}

/// Wrapper for HashMap::get using Path::canonicalize
pub fn get(&self, k: &Path) -> &PathData {
self.map.get(&Self::canonicalize_path(k)).unwrap()
}

/// Wrapper for HashMap::get_mut using Path::canonicalize
pub fn get_mut(&mut self, k: &Path) -> &mut PathData {
self.map.get_mut(&Self::canonicalize_path(k)).unwrap()
}

/// Canonicalize `path` if it is not already an absolute path
fn canonicalize_path(path: &Path) -> PathBuf {
if path.is_relative() && !path.is_stdin() {
if let Ok(p) = path.canonicalize() {
return p;
}
}
path.to_owned()
}

pub fn get_mut_metadata(&mut self, path: &Path) -> Option<&Metadata> {
self.get_mut(path).metadata.as_ref()
}

pub fn keys(&self) -> Keys<PathBuf, PathData> {
self.map.keys()
}

pub fn contains_key(&self, k: &Path) -> bool {
self.map.contains_key(k)
}

pub fn get_last(&self) -> Option<&PathBuf> {
self.last.as_ref()
}

/// Return true if there is only stdin remaining
pub fn only_stdin_remaining(&self) -> bool {
self.map.len() == 1 && (self.map.contains_key(Path::new(text::DASH)))
}

/// Return true if there is at least one "tailable" path (or stdin) remaining
pub fn files_remaining(&self) -> bool {
for path in self.map.keys() {
if path.is_tailable() || path.is_stdin() {
return true;
}
}
false
}

/// Returns true if there are no files remaining
pub fn no_files_remaining(&self, settings: &Settings) -> bool {
self.map.is_empty() || !self.files_remaining() && !settings.retry
}

/// Set `reader` to None to indicate that `path` is not an existing file anymore.
pub fn reset_reader(&mut self, path: &Path) {
self.get_mut(path).reader = None;
}

/// Reopen the file at the monitored `path`
pub fn update_reader(&mut self, path: &Path) -> UResult<()> {
/*
BUG: If it's not necessary to reopen a file, GNU's tail calls seek to offset 0.
However we can't call seek here because `BufRead` does not implement `Seek`.
As a workaround we always reopen the file even though this might not always
be necessary.
*/
self.get_mut(path)
.reader
.replace(Box::new(BufReader::new(File::open(&path)?)));
Ok(())
}

/// Reload metadata from `path`, or `metadata`
pub fn update_metadata(&mut self, path: &Path, metadata: Option<Metadata>) {
self.get_mut(path).metadata = if metadata.is_some() {
metadata
} else {
path.metadata().ok()
};
}

/// Read new data from `path` and print it to stdout
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you document the return value here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. There's a lot of documenation missing in general. I can address this at the end of the next iteration.

pub fn tail_file(&mut self, path: &Path, verbose: bool) -> UResult<bool> {
let mut chunks = BytesChunkBuffer::new(u64::MAX);
if let Some(reader) = self.get_mut(path).reader.as_mut() {
chunks.fill(reader)?;
}
if chunks.has_data() {
if self.needs_header(path, verbose) {
let display_name = self.get(path).display_name.clone();
self.header_printer.print(display_name.as_str());
}

let stdout = stdout();
let writer = BufWriter::new(stdout.lock());
chunks.print(writer)?;

self.last.replace(path.to_owned());
self.update_metadata(path, None);
Ok(true)
} else {
Ok(false)
}
}

/// Decide if printing `path` needs a header based on when it was last printed
pub fn needs_header(&self, path: &Path, verbose: bool) -> bool {
if verbose {
if let Some(ref last) = self.last {
return !last.eq(&path);
} else {
return true;
}
}
false
}
}

/// Data structure to keep a handle on the BufReader, Metadata
/// and the display_name (header_name) of files that are being followed.
pub struct PathData {
pub reader: Option<Box<dyn BufRead>>,
pub metadata: Option<Metadata>,
pub display_name: String,
}

impl PathData {
pub fn new(
reader: Option<Box<dyn BufRead>>,
metadata: Option<Metadata>,
display_name: &str,
) -> Self {
Self {
reader,
metadata,
display_name: display_name.to_owned(),
}
}
pub fn from_other_with_path(data: Self, path: &Path) -> Self {
// Remove old reader
let old_reader = data.reader;
let reader = if old_reader.is_some() {
// Use old reader with the same file descriptor if there is one
old_reader
} else if let Ok(file) = File::open(path) {
// Open new file tail from start
Some(Box::new(BufReader::new(file)) as Box<dyn BufRead>)
} else {
// Probably file was renamed/moved or removed again
None
};

Self::new(reader, path.metadata().ok(), data.display_name.as_str())
}
}
9 changes: 9 additions & 0 deletions src/uu/tail/src/follow/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.

mod files;
mod watch;

pub use watch::{follow, WatcherService};
Loading