diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 002005f1809..fef042beee6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -101,6 +101,10 @@ jobs: RUSTFLAGS="-C link-dead-code" cargo build --verbose --color always --features rpc-client RUSTFLAGS="-C link-dead-code" cargo build --verbose --color always --features rpc-client,rest-client RUSTFLAGS="-C link-dead-code" cargo build --verbose --color always --features rpc-client,rest-client,tokio + - name: Test backtrace-debug builds on Rust ${{ matrix.toolchain }} + if: "matrix.build-no-std" + run: | + cd lightning && cargo test --verbose --color always --features backtrace - name: Test on Rust ${{ matrix.toolchain }} with net-tokio if: "matrix.build-net-tokio && !matrix.coverage" run: cargo test --verbose --color always diff --git a/lightning/Cargo.toml b/lightning/Cargo.toml index 445e9223443..55ee177996c 100644 --- a/lightning/Cargo.toml +++ b/lightning/Cargo.toml @@ -39,6 +39,7 @@ secp256k1 = { version = "0.20.2", default-features = false, features = ["alloc"] hashbrown = { version = "0.11", optional = true } hex = { version = "0.3", optional = true } regex = { version = "0.1.80", optional = true } +backtrace = { version = "0.3", optional = true } core2 = { version = "0.3.0", optional = true, default-features = false } diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 9db666bb03d..05f88329176 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -920,9 +920,10 @@ mod tests { merkle_root: Default::default() }; nodes[0].chain_monitor.chain_monitor.best_block_updated(&latest_header, nodes[0].best_block_info().1 + LATENCY_GRACE_PERIOD_BLOCKS); } else { - for (funding_outpoint, update_ids) in chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().iter() { + let persistences = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clone(); + for (funding_outpoint, update_ids) in persistences { for update_id in update_ids { - nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(*funding_outpoint, *update_id).unwrap(); + nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_outpoint, update_id).unwrap(); } } } diff --git a/lightning/src/debug_sync.rs b/lightning/src/debug_sync.rs new file mode 100644 index 00000000000..7ee5ee521bc --- /dev/null +++ b/lightning/src/debug_sync.rs @@ -0,0 +1,213 @@ +pub use ::alloc::sync::Arc; +use core::ops::{Deref, DerefMut}; +use core::time::Duration; + +use std::collections::HashSet; +use std::cell::RefCell; + +use std::sync::atomic::{AtomicUsize, Ordering}; + +use std::sync::Mutex as StdMutex; +use std::sync::MutexGuard as StdMutexGuard; +use std::sync::RwLock as StdRwLock; +use std::sync::RwLockReadGuard as StdRwLockReadGuard; +use std::sync::RwLockWriteGuard as StdRwLockWriteGuard; +use std::sync::Condvar as StdCondvar; + +#[cfg(feature = "backtrace")] +use backtrace::Backtrace; + +pub type LockResult = Result; + +pub struct Condvar { + inner: StdCondvar, +} + +impl Condvar { + pub fn new() -> Condvar { + Condvar { inner: StdCondvar::new() } + } + + pub fn wait<'a, T>(&'a self, guard: MutexGuard<'a, T>) -> LockResult> { + let mutex: &'a Mutex = guard.mutex; + self.inner.wait(guard.into_inner()).map(|lock| MutexGuard { mutex, lock }).map_err(|_| ()) + } + + #[allow(unused)] + pub fn wait_timeout<'a, T>(&'a self, guard: MutexGuard<'a, T>, dur: Duration) -> LockResult<(MutexGuard<'a, T>, ())> { + let mutex = guard.mutex; + self.inner.wait_timeout(guard.into_inner(), dur).map(|(lock, _)| (MutexGuard { mutex, lock }, ())).map_err(|_| ()) + } + + pub fn notify_all(&self) { self.inner.notify_all(); } +} + +thread_local! { + /// We track the set of locks currently held by a reference to their `MutexMetadata` + static MUTEXES_HELD: RefCell>> = RefCell::new(HashSet::new()); +} +static MUTEX_IDX: AtomicUsize = AtomicUsize::new(0); + +/// Metadata about a single mutex, by id, the set of things locked-before it, and the backtrace of +/// when the Mutex itself was constructed. +struct MutexMetadata { + mutex_idx: u64, + locked_before: StdMutex>>, + #[cfg(feature = "backtrace")] + mutex_construction_bt: Backtrace, +} +impl PartialEq for MutexMetadata { + fn eq(&self, o: &MutexMetadata) -> bool { self.mutex_idx == o.mutex_idx } +} +impl Eq for MutexMetadata {} +impl std::hash::Hash for MutexMetadata { + fn hash(&self, hasher: &mut H) { hasher.write_u64(self.mutex_idx); } +} + +pub struct Mutex { + inner: StdMutex, + deps: Arc, +} + +#[must_use = "if unused the Mutex will immediately unlock"] +pub struct MutexGuard<'a, T: Sized + 'a> { + mutex: &'a Mutex, + lock: StdMutexGuard<'a, T>, +} + +impl<'a, T: Sized> MutexGuard<'a, T> { + fn into_inner(self) -> StdMutexGuard<'a, T> { + // Somewhat unclear why we cannot move out of self.lock, but doing so gets E0509. + unsafe { + let v: StdMutexGuard<'a, T> = std::ptr::read(&self.lock); + std::mem::forget(self); + v + } + } +} + +impl Drop for MutexGuard<'_, T> { + fn drop(&mut self) { + MUTEXES_HELD.with(|held| { + held.borrow_mut().remove(&self.mutex.deps); + }); + } +} + +impl Deref for MutexGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + &self.lock.deref() + } +} + +impl DerefMut for MutexGuard<'_, T> { + fn deref_mut(&mut self) -> &mut T { + self.lock.deref_mut() + } +} + +impl Mutex { + pub fn new(inner: T) -> Mutex { + Mutex { + inner: StdMutex::new(inner), + deps: Arc::new(MutexMetadata { + locked_before: StdMutex::new(HashSet::new()), + mutex_idx: MUTEX_IDX.fetch_add(1, Ordering::Relaxed) as u64, + #[cfg(feature = "backtrace")] + mutex_construction_bt: Backtrace::new(), + }), + } + } + + pub fn lock<'a>(&'a self) -> LockResult> { + MUTEXES_HELD.with(|held| { + // For each mutex which is currently locked, check that no mutex's locked-before + // set includes the mutex we're about to lock, which would imply a lockorder + // inversion. + for locked in held.borrow().iter() { + for locked_dep in locked.locked_before.lock().unwrap().iter() { + if *locked_dep == self.deps { + #[cfg(feature = "backtrace")] + panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\n{:?}", locked.mutex_construction_bt); + #[cfg(not(feature = "backtrace"))] + panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info."); + } + } + // Insert any already-held mutexes in our locked-before set. + self.deps.locked_before.lock().unwrap().insert(Arc::clone(locked)); + } + held.borrow_mut().insert(Arc::clone(&self.deps)); + }); + self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).map_err(|_| ()) + } + + pub fn try_lock<'a>(&'a self) -> LockResult> { + let res = self.inner.try_lock().map(|lock| MutexGuard { mutex: self, lock }).map_err(|_| ()); + if res.is_ok() { + MUTEXES_HELD.with(|held| { + // Since a try-lock will simply fail if the lock is held already, we do not + // consider try-locks to ever generate lockorder inversions. However, if a try-lock + // succeeds, we do consider it to have created lockorder dependencies. + for locked in held.borrow().iter() { + self.deps.locked_before.lock().unwrap().insert(Arc::clone(locked)); + } + held.borrow_mut().insert(Arc::clone(&self.deps)); + }); + } + res + } +} + +pub struct RwLock { + inner: StdRwLock +} + +pub struct RwLockReadGuard<'a, T: ?Sized + 'a> { + lock: StdRwLockReadGuard<'a, T>, +} + +pub struct RwLockWriteGuard<'a, T: ?Sized + 'a> { + lock: StdRwLockWriteGuard<'a, T>, +} + +impl Deref for RwLockReadGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + &self.lock.deref() + } +} + +impl Deref for RwLockWriteGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + &self.lock.deref() + } +} + +impl DerefMut for RwLockWriteGuard<'_, T> { + fn deref_mut(&mut self) -> &mut T { + self.lock.deref_mut() + } +} + +impl RwLock { + pub fn new(inner: T) -> RwLock { + RwLock { inner: StdRwLock::new(inner) } + } + + pub fn read<'a>(&'a self) -> LockResult> { + self.inner.read().map(|lock| RwLockReadGuard { lock }).map_err(|_| ()) + } + + pub fn write<'a>(&'a self) -> LockResult> { + self.inner.write().map(|lock| RwLockWriteGuard { lock }).map_err(|_| ()) + } + + pub fn try_write<'a>(&'a self) -> LockResult> { + self.inner.try_write().map(|lock| RwLockWriteGuard { lock }).map_err(|_| ()) + } +} diff --git a/lightning/src/lib.rs b/lightning/src/lib.rs index 3338803f9a0..ca18d7bb5cc 100644 --- a/lightning/src/lib.rs +++ b/lightning/src/lib.rs @@ -143,8 +143,16 @@ mod prelude { pub use alloc::string::ToString; } +#[cfg(all(feature = "std", test))] +mod debug_sync; +#[cfg(all(feature = "backtrace", feature = "std", test))] +extern crate backtrace; + #[cfg(feature = "std")] mod sync { + #[cfg(test)] + pub use debug_sync::*; + #[cfg(not(test))] pub use ::std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard}; } diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index e9af5b18681..77e9152d4c1 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -3931,12 +3931,12 @@ impl ChannelMana } fn finalize_claims(&self, mut sources: Vec) { + let mut outbounds = self.pending_outbound_payments.lock().unwrap(); let mut pending_events = self.pending_events.lock().unwrap(); for source in sources.drain(..) { if let HTLCSource::OutboundRoute { session_priv, payment_id, path, .. } = source { let mut session_priv_bytes = [0; 32]; session_priv_bytes.copy_from_slice(&session_priv[..]); - let mut outbounds = self.pending_outbound_payments.lock().unwrap(); if let hash_map::Entry::Occupied(mut payment) = outbounds.entry(payment_id) { assert!(payment.get().is_fulfilled()); if payment.get_mut().remove(&session_priv_bytes, None) { @@ -5321,8 +5321,8 @@ where inbound_payment.expiry_time > header.time as u64 }); - let mut pending_events = self.pending_events.lock().unwrap(); let mut outbounds = self.pending_outbound_payments.lock().unwrap(); + let mut pending_events = self.pending_events.lock().unwrap(); outbounds.retain(|payment_id, payment| { if payment.remaining_parts() != 0 { return true } if let PendingOutboundPayment::Retryable { starting_block_height, payment_hash, .. } = payment { @@ -6151,6 +6151,8 @@ impl Writeable f peer_state.latest_features.write(writer)?; } + let pending_inbound_payments = self.pending_inbound_payments.lock().unwrap(); + let pending_outbound_payments = self.pending_outbound_payments.lock().unwrap(); let events = self.pending_events.lock().unwrap(); (events.len() as u64).write(writer)?; for event in events.iter() { @@ -6172,14 +6174,12 @@ impl Writeable f (self.last_node_announcement_serial.load(Ordering::Acquire) as u32).write(writer)?; (self.highest_seen_timestamp.load(Ordering::Acquire) as u32).write(writer)?; - let pending_inbound_payments = self.pending_inbound_payments.lock().unwrap(); (pending_inbound_payments.len() as u64).write(writer)?; for (hash, pending_payment) in pending_inbound_payments.iter() { hash.write(writer)?; pending_payment.write(writer)?; } - let pending_outbound_payments = self.pending_outbound_payments.lock().unwrap(); // For backwards compat, write the session privs and their total length. let mut num_pending_outbounds_compat: u64 = 0; for (_, outbound) in pending_outbound_payments.iter() { diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs index fe67188f280..0993caf4c1d 100644 --- a/lightning/src/ln/functional_tests.rs +++ b/lightning/src/ln/functional_tests.rs @@ -2481,7 +2481,7 @@ fn revoked_output_claim() { mine_transaction(&nodes[1], &revoked_local_txn[0]); check_added_monitors!(nodes[1], 1); check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed); - let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); assert_eq!(node_txn.len(), 2); // ChannelMonitor: justice tx against revoked to_local output, ChannelManager: local commitment tx check_spends!(node_txn[0], revoked_local_txn[0]); @@ -4645,7 +4645,7 @@ fn test_claim_sizeable_push_msat() { check_closed_broadcast!(nodes[1], true); check_added_monitors!(nodes[1], 1); check_closed_event!(nodes[1], 1, ClosureReason::HolderForceClosed); - let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); assert_eq!(node_txn.len(), 1); check_spends!(node_txn[0], chan.3); assert_eq!(node_txn[0].output.len(), 2); // We can't force trimming of to_remote output as channel_reserve_satoshis block us to do so at channel opening @@ -4713,7 +4713,7 @@ fn test_claim_on_remote_revoked_sizeable_push_msat() { check_added_monitors!(nodes[1], 1); check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed); - let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); mine_transaction(&nodes[1], &node_txn[0]); connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1); @@ -4756,7 +4756,7 @@ fn test_static_spendable_outputs_preimage_tx() { } // Check B's monitor was able to send back output descriptor event for preimage tx on A's commitment tx - let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); // ChannelManager : 2 (local commitment tx + HTLC-Success), ChannelMonitor: preimage tx + let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); // ChannelManager : 2 (local commitment tx + HTLC-Success), ChannelMonitor: preimage tx assert_eq!(node_txn.len(), 3); check_spends!(node_txn[0], commitment_tx[0]); assert_eq!(node_txn[0].input[0].witness.last().unwrap().len(), OFFERED_HTLC_SCRIPT_WEIGHT); @@ -4842,7 +4842,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_commitment_tx() { check_added_monitors!(nodes[1], 1); check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed); - let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); assert_eq!(node_txn.len(), 2); assert_eq!(node_txn[0].input.len(), 2); check_spends!(node_txn[0], revoked_local_txn[0]); @@ -4895,7 +4895,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_htlc_timeout_tx() { check_added_monitors!(nodes[1], 1); check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed); - let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); assert_eq!(node_txn.len(), 3); // ChannelMonitor: bogus justice tx, justice tx on revoked outputs, ChannelManager: local commitment tx // The first transaction generated is bogus - it spends both outputs of revoked_local_txn[0] // including the one already spent by revoked_htlc_txn[1]. That's OK, we'll spend with valid @@ -4951,7 +4951,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_htlc_success_tx() { check_closed_broadcast!(nodes[1], true); check_added_monitors!(nodes[1], 1); check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed); - let revoked_htlc_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let revoked_htlc_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); assert_eq!(revoked_htlc_txn.len(), 2); assert_eq!(revoked_htlc_txn[0].input.len(), 1); @@ -4969,7 +4969,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_htlc_success_tx() { check_added_monitors!(nodes[0], 1); check_closed_event!(nodes[0], 1, ClosureReason::CommitmentTxConfirmed); - let node_txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let node_txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); assert_eq!(node_txn.len(), 3); // ChannelMonitor: justice tx on revoked commitment, justice tx on revoked HTLC-success, ChannelManager: local commitment tx // The first transaction generated is bogus - it spends both outputs of revoked_local_txn[0] @@ -5110,7 +5110,7 @@ fn test_onchain_to_onchain_claim() { let commitment_tx = get_local_commitment_txn!(nodes[0], chan_1.2); mine_transaction(&nodes[1], &commitment_tx[0]); check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed); - let b_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let b_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); // ChannelMonitor: HTLC-Success tx, ChannelManager: local commitment tx + HTLC-Success tx assert_eq!(b_txn.len(), 3); check_spends!(b_txn[1], chan_1.3); @@ -8685,7 +8685,7 @@ fn do_test_onchain_htlc_settlement_after_close(broadcast_alice: bool, go_onchain }; let header = BlockHeader { version: 0x20000000, prev_blockhash: nodes[1].best_block_hash(), merkle_root: Default::default(), time: 42, bits: 42, nonce: 42}; connect_block(&nodes[1], &Block { header, txdata: vec![txn_to_broadcast[0].clone()]}); - let mut bob_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let mut bob_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); if broadcast_alice { check_closed_broadcast!(nodes[1], true); check_added_monitors!(nodes[1], 1); diff --git a/lightning/src/ln/shutdown_tests.rs b/lightning/src/ln/shutdown_tests.rs index b712212ab75..7afc982faab 100644 --- a/lightning/src/ln/shutdown_tests.rs +++ b/lightning/src/ln/shutdown_tests.rs @@ -778,7 +778,7 @@ fn do_test_closing_signed_reinit_timeout(timeout_step: TimeoutStep) { nodes[1].node.timer_tick_occurred(); nodes[1].node.timer_tick_occurred(); - let txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); + let txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); assert_eq!(txn.len(), 1); assert_eq!(txn[0].output.len(), 2);