From 578f7972f273f6ca5756e66f0c9f103e44ae4eac Mon Sep 17 00:00:00 2001 From: David Alves Date: Tue, 22 Aug 2023 15:01:10 -0700 Subject: [PATCH] Allow to obtain the number of pending checkpoints This is a measure of backpressure: if the number of checkpoints pendings increases too much it means we're falling behind. This also make it so that we panic!() when a checkpoint fails. It's important to fail here because most of the time we don't know how to recover from a checkpoint having failed. --- src/lib.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a5bc80a..af309fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -269,6 +269,12 @@ where } } + /// Returns the number of checkpoints that are pending. + #[must_use] + pub fn pending_checkpoints(&self) -> usize { + self.data.checkpoint_sender.len() + } + fn reclaim( &self, file: LogFile, @@ -359,8 +365,11 @@ where drop(writer); let mut manager = wal.data.manager.lock(); if let Err(error) = manager.checkpoint_to(entry_id, &mut reader, &wal) { - error!("Checkpointer failed with error: {error:?}. Skipping checkpoint"); - continue; + let message = format!( + "Fatal Error: Checkpointer failed with error: {error:?}. Cannot proceed." + ); + error!("{}", message); + panic!("{}", message); } writer = file_to_checkpoint.lock(); Some(entry_id)