From 6db5f293a24dedafe3be2b8813784b66568fdef0 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Sun, 12 Jun 2022 16:01:37 +0200 Subject: [PATCH] tail: reduce CPU load for polling This reduces the CPU load for polling drastically (from ~80% down to ~5%) by removing/fixing several previous workarounds related to polling, while still passing all related GNU test-suite checks. * set Notify::PollWatcher delay to: sleep_sec/10 instead of sleep_sec/100 * set recv_timeout to sleep_sec instead of sleep_sec/100 * remove the manual polling of watched files Bugs: * fix an issue with headers to consistently pass "test_follow_name_retry_headers" and "gnu/tests/tail-2/overlay-headers.sh" Code clean-up and refactor * make fields of struct FileHandling private (and add getters/setters) to ensure that the paths are absolute and match the paths returned by Notify::Events * replace calls to "crash!" with "return USimpleError" * clean-up formatting --- src/uu/tail/src/tail.rs | 706 +++++++++++++++++++++------------------- 1 file changed, 374 insertions(+), 332 deletions(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 5329d9eecf2..6dc7447b603 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -25,6 +25,7 @@ extern crate uucore; mod chunks; mod parse; mod platform; +use crate::files::FileHandling; use chunks::ReverseChunks; use clap::{Arg, Command}; @@ -52,12 +53,12 @@ use std::os::unix::fs::MetadataExt; use std::os::unix::prelude::FileTypeExt; const ABOUT: &str = "\ - Print the last 10 lines of each FILE to standard output.\n\ - With more than one FILE, precede each with a header giving the file name.\n\ - With no FILE, or when FILE is -, read standard input.\n\ - \n\ - Mandatory arguments to long flags are mandatory for short flags too.\ - "; + Print the last 10 lines of each FILE to standard output.\n\ + With more than one FILE, precede each with a header giving the file name.\n\ + With no FILE, or when FILE is -, read standard input.\n\ + \n\ + Mandatory arguments to long flags are mandatory for short flags too.\ + "; const USAGE: &str = "{} [FLAG]... [FILE]..."; pub mod text { @@ -114,7 +115,7 @@ enum FollowMode { } #[derive(Debug, Default)] -struct Settings { +pub struct Settings { beginning: bool, follow: Option, max_unchanged_stats: u32, @@ -160,14 +161,6 @@ impl Settings { settings.use_polling = matches.is_present(options::USE_POLLING); - if settings.use_polling { - // NOTE: Value decreased to accommodate for discrepancies. Divisor chosen - // empirically in order to pass timing sensitive GNU test-suite checks. - // Without this adjustment and when polling, i.e. `---disable-inotify`, - // we're too slow to pick up every event that GNU's tail is picking up. - settings.sleep_sec /= 100; - } - if let Some(s) = matches.value_of(options::MAX_UNCHANGED_STATS) { settings.max_unchanged_stats = match s.parse::() { Ok(s) => s, @@ -302,22 +295,19 @@ fn uu_tail(mut settings: Settings) -> UResult<()> { } let mut first_header = true; - let mut files = FileHandling { - map: HashMap::with_capacity(settings.paths.len()), - last: None, - }; + let mut files = FileHandling::with_capacity(settings.paths.len()); // Do an initial tail print of each path's content. // Add `path` to `files` map if `--follow` is selected. for path in &settings.paths { - let mut path = path.to_path_buf(); - let mut display_name = path.to_path_buf(); + let mut path = path.to_owned(); + let mut display_name = path.to_owned(); // Workaround to handle redirects, e.g. `touch f && tail -f - < f` if cfg!(unix) && path.is_stdin() { display_name = PathBuf::from(text::STDIN_HEADER); if let Ok(p) = Path::new(text::DEV_STDIN).canonicalize() { - path = p.to_owned(); + path = p; } else { path = PathBuf::from(text::DEV_STDIN); } @@ -398,12 +388,13 @@ fn uu_tail(mut settings: Settings) -> UResult<()> { if settings.follow == Some(FollowMode::Descriptor) { // Insert `stdin` into `files.map` files.insert( - path.to_path_buf(), + &path, PathData { reader: Some(Box::new(reader)), metadata: None, display_name: PathBuf::from(text::STDIN_HEADER), }, + true, ); } } else { @@ -440,12 +431,13 @@ fn uu_tail(mut settings: Settings) -> UResult<()> { if settings.follow.is_some() { // Insert existing/file `path` into `files.map` files.insert( - path.canonicalize()?, + &path, PathData { reader: Some(Box::new(reader)), metadata, display_name, }, + true, ); } } @@ -466,12 +458,13 @@ fn uu_tail(mut settings: Settings) -> UResult<()> { } // Insert non-is_tailable() paths into `files.map` files.insert( - path.to_path_buf(), + &path, PathData { reader: None, metadata, display_name, }, + false, ); } } @@ -486,7 +479,7 @@ fn uu_tail(mut settings: Settings) -> UResult<()> { the input file is not a FIFO, pipe, or regular file, it is unspecified whether or not the -f option shall be ignored. */ - if files.map.is_empty() || !files.files_remaining() && !settings.retry { + if files.no_files_remaining(&settings) { if !files.only_stdin_remaining() { show_error!("{}", text::NO_FILES_REMAINING); } @@ -602,9 +595,9 @@ pub fn uu_app<'a>() -> Command<'a> { .long(options::MAX_UNCHANGED_STATS) .help( "Reopen a FILE which has not changed size after N (default 5) iterations \ - to see if it has been unlinked or renamed (this is the usual case of rotated \ - log files); This option is meaningful only when polling \ - (i.e., with --use-polling) and when --follow=name", + to see if it has been unlinked or renamed (this is the usual case of rotated \ + log files); This option is meaningful only when polling \ + (i.e., with --use-polling) and when --follow=name", ), ) .arg( @@ -674,13 +667,17 @@ fn follow(files: &mut FileHandling, settings: &mut Settings) -> UResult<()> { // https://github.com/notify-rs/notify/issues/240 let mut watcher: Box; + let watcher_config = notify::poll::PollWatcherConfig { + // NOTE: Value decreased to accommodate for discrepancies. Divisor chosen + // empirically in order to pass timing sensitive GNU test-suite checks. + // Without this adjustment and when polling, i.e. `---disable-inotify`, + // we're too slow to pick up every event that GNU's tail is picking up. + poll_interval: settings.sleep_sec / 10, + ..Default::default() + }; if settings.use_polling || RecommendedWatcher::kind() == WatcherKind::PollWatcher { settings.use_polling = true; // We have to use polling because there's no supported backend - let config = notify::poll::PollWatcherConfig { - poll_interval: settings.sleep_sec, - ..Default::default() - }; - watcher = Box::new(notify::PollWatcher::with_config(tx, config).unwrap()); + watcher = Box::new(notify::PollWatcher::with_config(tx, watcher_config).unwrap()); } else { let tx_clone = tx.clone(); match notify::RecommendedWatcher::new(tx) { @@ -695,11 +692,8 @@ fn follow(files: &mut FileHandling, settings: &mut Settings) -> UResult<()> { ); set_exit_code(1); settings.use_polling = true; - let config = notify::poll::PollWatcherConfig { - poll_interval: settings.sleep_sec, - ..Default::default() - }; - watcher = Box::new(notify::PollWatcher::with_config(tx_clone, config).unwrap()); + watcher = + Box::new(notify::PollWatcher::with_config(tx_clone, watcher_config).unwrap()); } Err(e) => return Err(USimpleError::new(1, e.to_string())), }; @@ -710,10 +704,10 @@ fn follow(files: &mut FileHandling, settings: &mut Settings) -> UResult<()> { // If `path` is not an existing file, add its parent to `Watcher`. // If there is no parent, add `path` to `orphans`. let mut orphans = Vec::new(); - for path in files.map.keys() { + for path in files.keys() { if path.is_tailable() { // TODO: [2022-05; jhscheer] also add `file` (not just parent) to please - // "gnu/tests/tail-2/inotify-rotate-resourced.sh" because it is looking for + // "gnu/tests/tail-2/inotify-rotate-resourced.sh" because it is looking // for syscalls: 2x "inotify_add_watch" and 1x "inotify_rm_watch" let path = path.watchable(settings); watcher @@ -721,7 +715,7 @@ fn follow(files: &mut FileHandling, settings: &mut Settings) -> UResult<()> { .unwrap(); } else if settings.follow.is_some() && settings.retry { if path.is_orphan() { - orphans.push(path.to_path_buf()); + orphans.push(path.to_owned()); } else { let parent = path.parent().unwrap(); watcher @@ -749,7 +743,7 @@ fn follow(files: &mut FileHandling, settings: &mut Settings) -> UResult<()> { if settings.retry && settings.follow == Some(FollowMode::Name) { for new_path in &orphans { if new_path.exists() { - let pd = files.map.get(new_path).unwrap(); + let pd = files.get(new_path); let md = new_path.metadata().unwrap(); if md.is_tailable() && pd.reader.is_none() { show_error!( @@ -772,54 +766,31 @@ fn follow(files: &mut FileHandling, settings: &mut Settings) -> UResult<()> { } } - // Poll all watched files manually to not miss changes due to timing - // conflicts with `Notify::PollWatcher`. - // NOTE: This is a workaround because PollWatcher tends to miss events. - // e.g. `echo "X1" > missing ; sleep 0.1 ; echo "X" > missing ;` should trigger a - // truncation event, but PollWatcher doesn't recognize it. - // This is relevant to pass, e.g.: "gnu/tests/tail-2/truncate.sh" - // TODO: [2022-06; jhscheer] maybe use `--max-unchanged-stats` here to reduce fstat calls? - if settings.use_polling && settings.follow.is_some() { - for path in &files - .map - .keys() - .filter(|p| p.is_tailable()) - .map(|p| p.to_path_buf()) - .collect::>() - { - if let Ok(new_md) = path.metadata() { - let pd = files.map.get(path).unwrap(); - if let Some(old_md) = &pd.metadata { - if old_md.is_tailable() - && new_md.is_tailable() - && old_md.got_truncated(&new_md)? - { - show_error!("{}: file truncated", pd.display_name.display()); - files.update_metadata(path, Some(new_md)); - files.update_reader(path)?; - } - } - } - } - } - - // with -f, sleep for approximately N seconds (default 1.0) between iterations; + // With -f, sleep for approximately N seconds (default 1.0) between iterations; + // We wake up if Notify sends an Event or if we wait more than `sleep_sec`. let rx_result = rx.recv_timeout(settings.sleep_sec); if rx_result.is_ok() { _event_counter += 1; _timeout_counter = 0; } + let mut paths = vec![]; // Paths worth checking for new content to print match rx_result { Ok(Ok(event)) => { - handle_event(&event, files, settings, &mut watcher, &mut orphans)?; + if let Some(event_path) = event.paths.first() { + assert!(event_path.is_absolute()); + if files.contains_key(event_path) { + // Handle Event if it is about a path that we are monitoring + paths = handle_event(&event, files, settings, &mut watcher, &mut orphans)?; + } + } } Ok(Err(notify::Error { kind: notify::ErrorKind::Io(ref e), paths, })) if e.kind() == std::io::ErrorKind::NotFound => { if let Some(event_path) = paths.first() { - if files.map.contains_key(event_path) { + if files.contains_key(event_path) { let _ = watcher.unwatch(event_path); } } @@ -827,17 +798,29 @@ fn follow(files: &mut FileHandling, settings: &mut Settings) -> UResult<()> { Ok(Err(notify::Error { kind: notify::ErrorKind::MaxFilesWatch, .. - })) => crash!(1, "{} resources exhausted", text::BACKEND), - Ok(Err(e)) => crash!(1, "{:?}", e), + })) => { + return Err(USimpleError::new( + 1, + format!("{} resources exhausted", text::BACKEND), + )) + } + Ok(Err(e)) => return Err(USimpleError::new(1, format!("NotifyError: {}", e))), Err(mpsc::RecvTimeoutError::Timeout) => { _timeout_counter += 1; } - Err(e) => crash!(1, "RecvError: {:?}", e), + Err(e) => return Err(USimpleError::new(1, format!("RecvTimeoutError: {}", e))), + } + + if settings.use_polling && settings.follow.is_some() { + // Consider all files to potentially have new content. + // This is a workaround because `Notify::PollWatcher` + // does not recognize the "renaming" of files. + paths = files.keys().cloned().collect::>(); } // main print loop - for path in files.map.keys().cloned().collect::>() { - read_some = files.tail_file(&path, settings.verbose)?; + for path in &paths { + read_some = files.tail_file(path, settings.verbose)?; } if !read_some && settings.pid != 0 && process.is_dead() { @@ -868,58 +851,55 @@ fn handle_event( settings: &Settings, watcher: &mut Box, orphans: &mut Vec, -) -> UResult<()> { +) -> UResult> { use notify::event::*; - if let Some(event_path) = event.paths.first() { - if files.map.contains_key(event_path) { - let display_name = files - .map - .get(event_path) - .unwrap() - .display_name - .to_path_buf(); - - match event.kind { - EventKind::Modify(ModifyKind::Metadata(MetadataKind::Any | MetadataKind::WriteTime)) - // | EventKind::Access(AccessKind::Close(AccessMode::Write)) - | EventKind::Create(CreateKind::File | CreateKind::Folder | CreateKind::Any) - | EventKind::Modify(ModifyKind::Data(DataChange::Any)) - | EventKind::Modify(ModifyKind::Name(RenameMode::To)) => { - if let Ok(new_md) = event_path.metadata() { - let is_tailable = new_md.is_tailable(); - let pd = files.map.get(event_path).unwrap(); - if let Some(old_md) = &pd.metadata { - if is_tailable { - // We resume tracking from the start of the file, - // assuming it has been truncated to 0. This mimics GNU's `tail` - // behavior and is the usual truncation operation for log files. - if !old_md.is_tailable() { - show_error!( "{} has become accessible", display_name.quote()); - files.update_reader(event_path)?; - } else if pd.reader.is_none() { - show_error!( "{} has appeared; following new file", display_name.quote()); - files.update_reader(event_path)?; - } else if event.kind == EventKind::Modify(ModifyKind::Name(RenameMode::To)) { - show_error!( "{} has been replaced; following new file", display_name.quote()); - files.update_reader(event_path)?; - } else if old_md.got_truncated(&new_md)? { - show_error!("{}: file truncated", display_name.display()); - files.update_reader(event_path)?; - } - } else if !is_tailable && old_md.is_tailable() { - if pd.reader.is_some() { - files.reset_reader(event_path); - } else { - show_error!( - "{} has been replaced with an untailable file", - display_name.quote() - ); - } + let event_path = event.paths.first().unwrap(); + let display_name = files.get_display_name(event_path); + let mut paths: Vec = vec![]; + + match event.kind { + EventKind::Modify(ModifyKind::Metadata(MetadataKind::Any | MetadataKind::WriteTime)) + // | EventKind::Access(AccessKind::Close(AccessMode::Write)) + | EventKind::Create(CreateKind::File | CreateKind::Folder | CreateKind::Any) + | EventKind::Modify(ModifyKind::Data(DataChange::Any)) + | EventKind::Modify(ModifyKind::Name(RenameMode::To)) => { + if let Ok(new_md) = event_path.metadata() { + let is_tailable = new_md.is_tailable(); + let pd = files.get(event_path); + if let Some(old_md) = &pd.metadata { + if is_tailable { + // We resume tracking from the start of the file, + // assuming it has been truncated to 0. This mimics GNU's `tail` + // behavior and is the usual truncation operation for log files. + if !old_md.is_tailable() { + show_error!( "{} has become accessible", display_name.quote()); + files.update_reader(event_path)?; + } else if pd.reader.is_none() { + show_error!( "{} has appeared; following new file", display_name.quote()); + files.update_reader(event_path)?; + } else if event.kind == EventKind::Modify(ModifyKind::Name(RenameMode::To)) { + show_error!( "{} has been replaced; following new file", display_name.quote()); + files.update_reader(event_path)?; + } else if old_md.got_truncated(&new_md)? { + show_error!("{}: file truncated", display_name.display()); + files.update_reader(event_path)?; + } + paths.push(event_path.to_owned()); + } else if !is_tailable && old_md.is_tailable() { + if pd.reader.is_some() { + files.reset_reader(event_path); + } else { + show_error!( + "{} has been replaced with an untailable file", + display_name.quote() + ); } - } else if is_tailable { + } + } else if is_tailable { show_error!( "{} has appeared; following new file", display_name.quote()); files.update_reader(event_path)?; + paths.push(event_path.to_owned()); } else if settings.retry { if settings.follow == Some(FollowMode::Descriptor) { show_error!( @@ -927,9 +907,9 @@ fn handle_event( display_name.quote() ); let _ = watcher.unwatch(event_path); - files.map.remove(event_path).unwrap(); - if files.map.is_empty() { - crash!(1, "{}", text::NO_FILES_REMAINING); + files.remove(event_path); + if files.no_files_remaining(settings) { + return Err(USimpleError::new(1, text::NO_FILES_REMAINING)); } } else { show_error!( @@ -938,245 +918,308 @@ fn handle_event( ); } } - files.update_metadata(event_path, Some(new_md)); - } + files.update_metadata(event_path, Some(new_md)); } - EventKind::Remove(RemoveKind::File | RemoveKind::Any) - // | EventKind::Modify(ModifyKind::Name(RenameMode::Any)) - | EventKind::Modify(ModifyKind::Name(RenameMode::From)) => { - if settings.follow == Some(FollowMode::Name) { - if settings.retry { - if let Some(old_md) = &files.map.get_mut(event_path).unwrap().metadata { - if old_md.is_tailable() { - show_error!( - "{} has become inaccessible: {}", - display_name.quote(), - text::NO_SUCH_FILE - ); - } - } - if event_path.is_orphan() && !orphans.contains(event_path) { - show_error!("directory containing watched file was removed"); + } + EventKind::Remove(RemoveKind::File | RemoveKind::Any) + // | EventKind::Modify(ModifyKind::Name(RenameMode::Any)) + | EventKind::Modify(ModifyKind::Name(RenameMode::From)) => { + if settings.follow == Some(FollowMode::Name) { + if settings.retry { + if let Some(old_md) = files.get_mut_metadata(event_path) { + if old_md.is_tailable() { show_error!( - "{} cannot be used, reverting to polling", - text::BACKEND + "{} has become inaccessible: {}", + display_name.quote(), + text::NO_SUCH_FILE ); - orphans.push(event_path.to_path_buf()); - let _ = watcher.unwatch(event_path); - } - } else { - show_error!("{}: {}", display_name.display(), text::NO_SUCH_FILE); - if !files.files_remaining() && settings.use_polling { - crash!(1, "{}", text::NO_FILES_REMAINING); } } - files.reset_reader(event_path); - } else if settings.follow == Some(FollowMode::Descriptor) && settings.retry { - // --retry only effective for the initial open - let _ = watcher.unwatch(event_path); - files.map.remove(event_path).unwrap(); - } else if settings.use_polling && event.kind == EventKind::Remove(RemoveKind::Any) { - // BUG: - // The watched file was removed. Since we're using Polling, this - // could be a rename. We can't tell because `notify::PollWatcher` doesn't - // recognize renames properly. - // Ideally we want to call seek to offset 0 on the file handle. - // But because we only have access to `PathData::reader` as `BufRead`, - // we cannot seek to 0 with `BufReader::seek_relative`. - // Also because we don't have the new name, we cannot work around this - // by simply reopening the file. - } - } - EventKind::Modify(ModifyKind::Name(RenameMode::Both)) => { - // NOTE: For `tail -f a`, keep tracking additions to b after `mv a b` - // (gnu/tests/tail-2/descriptor-vs-rename.sh) - // NOTE: The File/BufReader doesn't need to be updated. - // However, we need to update our `files.map`. - // This can only be done for inotify, because this EventKind does not - // trigger for the PollWatcher. - // BUG: As a result, there's a bug if polling is used: - // $ tail -f file_a ---disable-inotify - // $ mv file_a file_b - // $ echo A >> file_b - // $ echo A >> file_a - // The last append to file_a is printed, however this shouldn't be because - // after the "mv" tail should only follow "file_b". - // TODO: [2022-05; jhscheer] add test for this bug - - if settings.follow == Some(FollowMode::Descriptor) { - let new_path = event.paths.last().unwrap().canonicalize()?; - // Open new file and seek to End: - let mut file = File::open(&new_path)?; - file.seek(SeekFrom::End(0))?; - // Add new reader but keep old display name - files.map.insert( - new_path.to_owned(), - PathData { - metadata: file.metadata().ok(), - reader: Some(Box::new(BufReader::new(file))), - display_name, // mimic GNU's tail and show old name in header - }, - ); - // Remove old reader - files.map.remove(event_path).unwrap(); - if files.last.as_ref().unwrap() == event_path { - files.last = Some(new_path.to_owned()); + if event_path.is_orphan() && !orphans.contains(event_path) { + show_error!("directory containing watched file was removed"); + show_error!( + "{} cannot be used, reverting to polling", + text::BACKEND + ); + orphans.push(event_path.to_owned()); + let _ = watcher.unwatch(event_path); + } + } else { + show_error!("{}: {}", display_name.display(), text::NO_SUCH_FILE); + if !files.files_remaining() && settings.use_polling { + // NOTE: GNU's tail exits here for `---disable-inotify` + return Err(USimpleError::new(1, text::NO_FILES_REMAINING)); } - // Unwatch old path and watch new path - let _ = watcher.unwatch(event_path); - let new_path = new_path.watchable(settings); - watcher - .watch( - &new_path.canonicalize()?, - RecursiveMode::NonRecursive, - ) - .unwrap(); } + files.reset_reader(event_path); + } else if settings.follow == Some(FollowMode::Descriptor) && settings.retry { + // --retry only effective for the initial open + let _ = watcher.unwatch(event_path); + files.remove(event_path); + } else if settings.use_polling && event.kind == EventKind::Remove(RemoveKind::Any) { + // BUG: + // The watched file was removed. Since we're using Polling, this + // could be a rename. We can't tell because `notify::PollWatcher` doesn't + // recognize renames properly. + // Ideally we want to call seek to offset 0 on the file handle. + // But because we only have access to `PathData::reader` as `BufRead`, + // we cannot seek to 0 with `BufReader::seek_relative`. + // Also because we don't have the new name, we cannot work around this + // by simply reopening the file. } - _ => {} + } + EventKind::Modify(ModifyKind::Name(RenameMode::Both)) => { + // NOTE: For `tail -f a`, keep tracking additions to b after `mv a b` + // (gnu/tests/tail-2/descriptor-vs-rename.sh) + // NOTE: The File/BufReader doesn't need to be updated. + // However, we need to update our `files.map`. + // This can only be done for inotify, because this EventKind does not + // trigger for the PollWatcher. + // BUG: As a result, there's a bug if polling is used: + // $ tail -f file_a ---disable-inotify + // $ mv file_a file_b + // $ echo A >> file_b + // $ echo A >> file_a + // The last append to file_a is printed, however this shouldn't be because + // after the "mv" tail should only follow "file_b". + // TODO: [2022-05; jhscheer] add test for this bug + + if settings.follow == Some(FollowMode::Descriptor) { + let new_path = event.paths.last().unwrap(); + assert!(new_path.is_absolute()); + paths.push(new_path.to_owned()); // TODO: [2022-06; jhscheer] still necessary? + // Open new file and seek to End: + let mut file = File::open(&new_path)?; + file.seek(SeekFrom::End(0))?; + // Add new reader but keep old display name + files.insert( + new_path, + PathData { + metadata: file.metadata().ok(), + reader: Some(Box::new(BufReader::new(file))), + display_name, // mimic GNU's tail and show old name in header + }, + files.get_last().unwrap() == event_path + ); + // Remove old reader + files.remove(event_path); + // Unwatch old path and watch new path + let _ = watcher.unwatch(event_path); + let new_path = new_path.watchable(settings); + watcher + .watch( + &new_path.canonicalize()?, + RecursiveMode::NonRecursive, + ) + .unwrap(); } } + _ => {} } - Ok(()) + Ok(paths) } /// Data structure to keep a handle on the BufReader, Metadata /// and the display_name (header_name) of files that are being followed. -struct PathData { +pub struct PathData { reader: Option>, metadata: Option, display_name: PathBuf, // the path as provided by user input, used for headers } -/// Data structure to keep a handle on files to follow. -/// `last` always holds the path/key of the last file that was printed from. -/// The keys of the HashMap can point to an existing file path (normal case), -/// or stdin ("-"), or to a non existing path (--retry). -/// With the exception of stdin, all keys in the HashMap are absolute Paths. -struct FileHandling { - map: HashMap, - last: Option, -} - -impl FileHandling { - /// Insert new `PathData` into the HashMap - fn insert(&mut self, k: PathBuf, v: PathData) -> Option { - self.last = Some(k.to_owned()); - self.map.insert(k, v) +mod files { + use super::*; + use std::collections::hash_map::Keys; + + /// Data structure to keep a handle on files to follow. + /// `last` always holds the path/key of the last file that was printed from. + /// The keys of the HashMap can point to an existing file path (normal case), + /// or stdin ("-"), or to a non existing path (--retry). + /// For existing files, all keys in the HashMap are absolute Paths. + pub struct FileHandling { + map: HashMap, + last: Option, } - /// Return true if there is only stdin remaining - fn only_stdin_remaining(&self) -> bool { - self.map.len() == 1 && (self.map.contains_key(Path::new(text::DASH))) - } + impl FileHandling { + /// Creates an empty `FileHandling` with the specified capacity + pub fn with_capacity(n: usize) -> Self { + Self { + map: HashMap::with_capacity(n), + last: None, + } + } - /// Return true if there is at least one "tailable" path (or stdin) remaining - fn files_remaining(&self) -> bool { - for path in self.map.keys() { - if path.is_tailable() || path.is_stdin() { - return true; + /// Wrapper for HashMap::insert using Path::canonicalize + pub fn insert(&mut self, k: &Path, v: PathData, update_last: bool) { + let k = Self::canonicalize_path(k); + if update_last { + self.last = Some(k.to_owned()); } + let _ = self.map.insert(k, v); } - false - } - /// Set `reader` to None to indicate that `path` is not an existing file anymore. - fn reset_reader(&mut self, path: &Path) { - assert!(self.map.contains_key(path)); - self.map.get_mut(path).unwrap().reader = None; - } + /// Wrapper for HashMap::remove using Path::canonicalize + pub fn remove(&mut self, k: &Path) { + self.map.remove(&Self::canonicalize_path(k)).unwrap(); + } - /// Reopen the file at the monitored `path` - fn update_reader(&mut self, path: &Path) -> UResult<()> { - assert!(self.map.contains_key(path)); - // BUG: - // If it's not necessary to reopen a file, GNU's tail calls seek to offset 0. - // However we can't call seek here because `BufRead` does not implement `Seek`. - // As a workaround we always reopen the file even though this might not always - // be necessary. - self.map.get_mut(path).unwrap().reader = Some(Box::new(BufReader::new(File::open(&path)?))); - Ok(()) - } + /// Wrapper for HashMap::get using Path::canonicalize + pub fn get(&self, k: &Path) -> &PathData { + self.map.get(&Self::canonicalize_path(k)).unwrap() + } - /// Reload metadata from `path`, or `metadata` - fn update_metadata(&mut self, path: &Path, metadata: Option) { - assert!(self.map.contains_key(path)); - self.map.get_mut(path).unwrap().metadata = if metadata.is_some() { - metadata - } else { - path.metadata().ok() - }; - } + // Canonicalize `path` if it is not already an absolute path + fn canonicalize_path(path: &Path) -> PathBuf { + if path.is_relative() && !path.is_stdin() { + if let Ok(p) = path.canonicalize() { + return p; + } + } + path.to_owned() + } - /// Read `path` from the current seek position forward - fn read_file(&mut self, path: &Path, buffer: &mut Vec) -> UResult { - assert!(self.map.contains_key(path)); - let mut read_some = false; - let pd = self.map.get_mut(path).unwrap().reader.as_mut(); - if let Some(reader) = pd { - loop { - match reader.read_until(b'\n', buffer) { - Ok(0) => break, - Ok(_) => { - read_some = true; - } - Err(err) => return Err(USimpleError::new(1, err.to_string())), + pub fn get_display_name(&self, path: &Path) -> PathBuf { + self.map.get(path).unwrap().display_name.to_owned() + } + + pub fn get_mut_metadata(&mut self, path: &Path) -> Option<&Metadata> { + self.map.get_mut(path).unwrap().metadata.as_ref() + } + + pub fn keys(&self) -> Keys { + self.map.keys() + } + + pub fn contains_key(&self, k: &Path) -> bool { + self.map.contains_key(k) + } + + pub fn get_last(&self) -> Option<&PathBuf> { + self.last.as_ref() + } + + /// Return true if there is only stdin remaining + pub fn only_stdin_remaining(&self) -> bool { + self.map.len() == 1 && (self.map.contains_key(Path::new(text::DASH))) + } + + /// Return true if there is at least one "tailable" path (or stdin) remaining + pub fn files_remaining(&self) -> bool { + for path in self.map.keys() { + if path.is_tailable() || path.is_stdin() { + return true; } } + false } - Ok(read_some) - } - /// Print `buffer` to stdout - fn print_file(&self, buffer: &[u8]) -> UResult<()> { - let mut stdout = stdout(); - stdout - .write_all(buffer) - .map_err_context(|| String::from("write error"))?; - Ok(()) - } + /// Returns true if there are no files remaining + pub fn no_files_remaining(&self, settings: &Settings) -> bool { + self.map.is_empty() || !self.files_remaining() && !settings.retry + } + + /// Set `reader` to None to indicate that `path` is not an existing file anymore. + pub fn reset_reader(&mut self, path: &Path) { + assert!(self.map.contains_key(path)); + self.map.get_mut(path).unwrap().reader = None; + } + + /// Reopen the file at the monitored `path` + pub fn update_reader(&mut self, path: &Path) -> UResult<()> { + assert!(self.map.contains_key(path)); + // BUG: + // If it's not necessary to reopen a file, GNU's tail calls seek to offset 0. + // However we can't call seek here because `BufRead` does not implement `Seek`. + // As a workaround we always reopen the file even though this might not always + // be necessary. + self.map.get_mut(path).unwrap().reader = + Some(Box::new(BufReader::new(File::open(&path)?))); + Ok(()) + } + + /// Reload metadata from `path`, or `metadata` + pub fn update_metadata(&mut self, path: &Path, metadata: Option) { + assert!(self.map.contains_key(path)); + self.map.get_mut(path).unwrap().metadata = if metadata.is_some() { + metadata + } else { + path.metadata().ok() + }; + } - /// Read new data from `path` and print it to stdout - fn tail_file(&mut self, path: &Path, verbose: bool) -> UResult { - let mut buffer = vec![]; - let read_some = self.read_file(path, &mut buffer)?; - if read_some { - if self.needs_header(path, verbose) { - self.print_header(path, true); + /// Read `path` from the current seek position forward + pub fn read_file(&mut self, path: &Path, buffer: &mut Vec) -> UResult { + assert!(self.map.contains_key(path)); + let mut read_some = false; + let pd = self.map.get_mut(path).unwrap().reader.as_mut(); + if let Some(reader) = pd { + loop { + match reader.read_until(b'\n', buffer) { + Ok(0) => break, + Ok(_) => { + read_some = true; + } + Err(err) => return Err(USimpleError::new(1, err.to_string())), + } + } } - self.print_file(&buffer)?; + Ok(read_some) + } + + /// Print `buffer` to stdout + pub fn print_file(&self, buffer: &[u8]) -> UResult<()> { + let mut stdout = stdout(); + stdout + .write_all(buffer) + .map_err_context(|| String::from("write error"))?; + Ok(()) + } + + /// Read new data from `path` and print it to stdout + pub fn tail_file(&mut self, path: &Path, verbose: bool) -> UResult { + let mut buffer = vec![]; + let read_some = self.read_file(path, &mut buffer)?; + if read_some { + if self.needs_header(path, verbose) { + self.print_header(path, true); + } + self.print_file(&buffer)?; - self.last = Some(path.to_path_buf()); // TODO: [2022-05; jhscheer] add test for this - self.update_metadata(path, None); + self.last = Some(path.to_owned()); + self.update_metadata(path, None); + } + Ok(read_some) } - Ok(read_some) - } - /// Decide if printing `path` needs a header based on when it was last printed - fn needs_header(&self, path: &Path, verbose: bool) -> bool { - if verbose { - if let Some(ref last) = self.last { - return !last.eq(&path); + /// Decide if printing `path` needs a header based on when it was last printed + pub fn needs_header(&self, path: &Path, verbose: bool) -> bool { + if verbose { + if let Some(ref last) = self.last { + return !last.eq(&path); + } else { + return true; + } } + false } - false - } - /// Print header for `path` to stdout - fn print_header(&self, path: &Path, needs_newline: bool) { - println!( - "{}==> {} <==", - if needs_newline { "\n" } else { "" }, - self.display_name(path) - ); - } + /// Print header for `path` to stdout + pub fn print_header(&self, path: &Path, needs_newline: bool) { + println!( + "{}==> {} <==", + if needs_newline { "\n" } else { "" }, + self.display_name(path) + ); + } - /// Wrapper for `PathData::display_name` - fn display_name(&self, path: &Path) -> String { - if let Some(path) = self.map.get(path) { - path.display_name.display().to_string() - } else { - path.display().to_string() + /// Wrapper for `PathData::display_name` + pub fn display_name(&self, path: &Path) -> String { + if let Some(path) = self.map.get(path) { + path.display_name.display().to_string() + } else { + path.display().to_string() + } } } } @@ -1442,7 +1485,6 @@ pub fn stdin_is_bad_fd() -> bool { } trait FileExtTail { - // clippy complains, but it looks like a false positive #[allow(clippy::wrong_self_convention)] fn is_seekable(&mut self) -> bool; } @@ -1537,12 +1579,12 @@ impl PathExtTail for Path { }); // TODO: [2021-10; jhscheer] add test for this - "cannot watch parent directory" if parent.is_dir() { - parent.to_path_buf() + parent.to_owned() } else { PathBuf::from(".") } } else { - self.to_path_buf() + self.to_owned() } } }