From 21684bbdfdc54b18daa68f623afc2b0be6718c72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Thu, 22 Sep 2022 15:33:14 +0800 Subject: [PATCH] Fix: potential inconsistency when installing snapshot The conflicting logs that are before `snapshot_meta.last_log_id` should be deleted before installing a snapshot. Otherwise there is chance the snapshot is installed but conflicting logs are left in the store, when a node crashes. --- openraft/src/core/append_entries.rs | 2 +- openraft/src/core/install_snapshot.rs | 29 ++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/openraft/src/core/append_entries.rs b/openraft/src/core/append_entries.rs index 00731d5e8..2058fc63d 100644 --- a/openraft/src/core/append_entries.rs +++ b/openraft/src/core/append_entries.rs @@ -110,7 +110,7 @@ impl, S: RaftStorage> Ra } #[tracing::instrument(level = "debug", skip(self))] - async fn delete_conflict_logs_since(&mut self, start: LogId) -> Result<(), StorageError> { + pub(crate) async fn delete_conflict_logs_since(&mut self, start: LogId) -> Result<(), StorageError> { // TODO(xp): add a StorageAdapter to provide auxiliary APIs. // e.g.: // - extract and manage membership config. diff --git a/openraft/src/core/install_snapshot.rs b/openraft/src/core/install_snapshot.rs index d71fd8f65..2ea72f1e6 100644 --- a/openraft/src/core/install_snapshot.rs +++ b/openraft/src/core/install_snapshot.rs @@ -16,6 +16,7 @@ use crate::AppData; use crate::AppDataResponse; use crate::ErrorSubject; use crate::ErrorVerb; +use crate::LogIdOptionExt; use crate::MessageSummary; use crate::RaftNetwork; use crate::RaftStorage; @@ -237,6 +238,32 @@ impl, S: RaftStorage> Ra return Ok(()); } + if let Some(last) = req.meta.last_log_id { + let idx = last.index; + let matches = { + let logs = self.storage.try_get_log_entries(idx..=idx).await?; + if let Some(ent) = logs.first() { + Some(ent.log_id) == req.meta.last_log_id + } else { + // no log entry, consider it unmatched. + false + } + }; + + // The log entry at snapshot_meta.last_log_id.index conflicts with the leaders' + // We have to delete all conflicting logs before installing snapshot. + // See: [snapshot-replication](https://datafuselabs.github.io/openraft/replication.html#snapshot-replication) + if !matches { + // Delete all non-committed log entries. + // It is safe: + let x = StorageHelper::new(&self.storage).get_log_id(self.last_applied.next_index()).await; + if let Ok(log_id) = x { + self.delete_conflict_logs_since(log_id).await?; + } + // else: no next log id, ignore + } + } + let changes = self.storage.install_snapshot(&req.meta, snapshot).await?; tracing::info!("update after install-snapshot: {:?}", changes); @@ -247,7 +274,7 @@ impl, S: RaftStorage> Ra let last_applied = changes.last_applied; - // Applied logs are not needed. + // Applied logs are not needed. Purge them or there may be a hole in the log. if let Some(last) = &last_applied { purge_applied_logs(self.storage.clone(), last, self.config.max_applied_log_to_keep).await?; }