Skip to content

Commit

Permalink
dice: Remove some logging
Browse files Browse the repository at this point in the history
Summary:
The stack around D42099161 introduced the ability to dynamically set the log level via a debug-command. This requires using `tracing_subscriber::reload::Layer`. The implementation of that machinery is backed by a `RwLock` [here](https://fburl.com/code/4xv0ihpn). This `RwLock` is read locked once on every single `#[instrumentation]` call to check whether the instrumentation is active or not.

On top of that, something in our fbsource third-party config enables the `parking_lot` feature of `tracing_subscriber`. This means that this is a `parking_lot::RwLock`, not a `std::sync::RwLock`, which is bad because it's well known that parking lot has problems with excessive spinning.

What all that adds up to is that when you put `#[instrument]` on a super hot function in dice, you get dozens of threads that are all simultaneously doing this:

```
  thread #23, name = 'buck2-rt', stop reason = signal SIGSTOP
    frame #0: 0x000000000c94a1fa buck2`<parking_lot::raw_rwlock::RawRwLock>::lock_shared_slow + 122
    frame #1: 0x0000000007a80b54 buck2`<tracing_subscriber::layer::layered::Layered<tracing_subscriber::reload::Layer<tracing_subscriber::filter::layer_filters::Filtered<tracing_subscriber::fmt::fmt_layer::Layer<tracing_subscriber::registry::sharded::Registry, tracing_subscriber::fmt::format::DefaultFields, tracing_subscriber::fmt::format::Format, std::io::stdio::stderr>, tracing_subscriber::filter::env::EnvFilter, tracing_subscriber::registry::sharded::Registry>, tracing_subscriber::registry::sharded::Registry>, tracing_subscriber::registry::sharded::Registry> as tracing_core::subscriber::Subscriber>::enabled + 292
    frame #2: 0x000000000a172d77 buck2`<dice::legacy::incremental::dep_trackers::internals::Dep<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>> as dice::legacy::incremental::graph::dependencies::Dependency>::recompute::{closure#0} + 183
    frame #3: 0x000000000a03606c buck2`<futures_util::stream::futures_unordered::FuturesUnordered<core::pin::Pin<alloc::boxed::Box<dyn core::future::future::Future<Output = core::result::Result<(alloc::boxed::Box<dyn dice::legacy::incremental::graph::dependencies::ComputedDependency>, alloc::sync::Arc<dyn dice::legacy::incremental::graph::GraphNodeDyn>), dice::api::error::DiceError>> + core::marker::Send>>> as futures_util::stream::stream::StreamExt>::poll_next_unpin + 444
    frame #4: 0x0000000009fc4755 buck2`<dice::legacy::incremental::IncrementalEngine<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>>>::compute_whether_dependencies_changed::{closure#0} (.llvm.4229350879637282184) + 1733
    frame #5: 0x0000000009ef30d4 buck2`<dice::legacy::incremental::IncrementalEngine<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>>>::compute_whether_versioned_dependencies_changed::{closure#0}::{closure#0} + 228
    frame #6: 0x0000000009f194ec buck2`<buck2_futures::cancellable_future::CancellableFuture<<dice::legacy::incremental::IncrementalEngine<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>>>::new_dice_task::{closure#0}> as core::future::future::Future>::poll (.llvm.11181184606289051452) + 3500
    frame #7: 0x0000000009f04bbf buck2`<futures_util::future::future::map::Map<buck2_futures::cancellable_future::CancellableFuture<<dice::legacy::incremental::IncrementalEngine<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>>>::new_dice_task::{closure#0}>, buck2_futures::spawn::spawn_inner<<dice::legacy::incremental::IncrementalEngine<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>>>::new_dice_task::{closure#0}, dice::api::user_data::UserComputationData, futures_util::future::ready::Ready<()>>::{closure#0}> as core::future::future::Future>::poll + 31
    frame #8: 0x0000000009ff0339 buck2`<futures_util::future::future::flatten::Flatten<futures_util::future::future::Map<futures_util::future::ready::Ready<()>, buck2_futures::spawn::spawn_inner<<dice::legacy::incremental::IncrementalEngine<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>>>::new_dice_task::{closure#0}, dice::api::user_data::UserComputationData, futures_util::future::ready::Ready<()>>::{closure#1}>, <futures_util::future::future::Map<futures_util::future::ready::Ready<()>, buck2_futures::spawn::spawn_inner<<dice::legacy::incremental::IncrementalEngine<dice::legacy::key::StoragePropertiesForKey<buck2_build_api::actions::calculation::BuildKey>>>::new_dice_task::{closure#0}, dice::api::user_data::UserComputationData, futures_util::future::ready::Ready<()>>::{closure#1}> as core::future::future::Future>::Output> as core::future::future::Future>::poll + 201
    frame #9: 0x00000000093f9f22 buck2`<tokio::task::task_local::TaskLocalFuture<buck2_events::dispatch::EventDispatcher, core::pin::Pin<alloc::boxed::Box<dyn core::future::future::Future<Output = alloc::boxed::Box<dyn core::any::Any + core::marker::Send>> + core::marker::Send>>> as core::future::future::Future>::poll + 130
    frame #10: 0x000000000939fdcb buck2`<tracing::instrument::Instrumented<<buck2_build_api::spawner::BuckSpawner as buck2_futures::spawner::Spawner<dice::api::user_data::UserComputationData>>::spawn::{closure#0}> as core::future::future::Future>::poll + 139
    frame #11: 0x00000000091ca5a9 buck2`<tokio::runtime::task::core::Core<tracing::instrument::Instrumented<<buck2_build_api::spawner::BuckSpawner as buck2_futures::spawner::Spawner<dice::api::user_data::UserComputationData>>::spawn::{closure#0}>, alloc::sync::Arc<tokio::runtime::scheduler::multi_thread::handle::Handle>>>::poll + 169
    frame #12: 0x00000000091c1b22 buck2`<tokio::runtime::task::harness::Harness<tracing::instrument::Instrumented<<buck2_build_api::spawner::BuckSpawner as buck2_futures::spawner::Spawner<dice::api::user_data::UserComputationData>>::spawn::{closure#0}>, alloc::sync::Arc<tokio::runtime::scheduler::multi_thread::handle::Handle>>>::poll + 146
    frame #13: 0x000000000c920f6f buck2`<tokio::runtime::scheduler::multi_thread::worker::Context>::run_task + 895
    frame #14: 0x000000000c91f847 buck2`<tokio::runtime::scheduler::multi_thread::worker::Context>::run + 2103
    frame #15: 0x000000000c932264 buck2`<tokio::runtime::context::scoped::Scoped<tokio::runtime::scheduler::Context>>::set::<tokio::runtime::scheduler::multi_thread::worker::run::{closure#0}::{closure#0}, ()> + 52
    frame #16: 0x000000000c932169 buck2`tokio::runtime::context::runtime::enter_runtime::<tokio::runtime::scheduler::multi_thread::worker::run::{closure#0}, ()> + 441
    frame #17: 0x000000000c91efa6 buck2`tokio::runtime::scheduler::multi_thread::worker::run + 70
    frame #18: 0x000000000c906a50 buck2`<tracing::instrument::Instrumented<tokio::runtime::blocking::task::BlockingTask<<tokio::runtime::scheduler::multi_thread::worker::Launch>::launch::{closure#0}>> as core::future::future::Future>::poll + 160
    frame #19: 0x000000000c8f8af9 buck2`<tokio::loom::std::unsafe_cell::UnsafeCell<tokio::runtime::task::core::Stage<tracing::instrument::Instrumented<tokio::runtime::blocking::task::BlockingTask<<tokio::runtime::scheduler::multi_thread::worker::Launch>::launch::{closure#0}>>>>>::with_mut::<core::task::poll::Poll<()>, <tokio::runtime::task::core::Core<tracing::instrument::Instrumented<tokio::runtime::blocking::task::BlockingTask<<tokio::runtime::scheduler::multi_thread::worker::Launch>::launch::{closure#0}>>, tokio::runtime::blocking::schedule::BlockingSchedule>>::poll::{closure#0}> + 153
    frame #20: 0x000000000c90166b buck2`<tokio::runtime::task::core::Core<tracing::instrument::Instrumented<tokio::runtime::blocking::task::BlockingTask<<tokio::runtime::scheduler::multi_thread::worker::Launch>::launch::{closure#0}>>, tokio::runtime::blocking::schedule::BlockingSchedule>>::poll + 43
    frame #21: 0x000000000c90d9b8 buck2`<tokio::runtime::task::harness::Harness<tracing::instrument::Instrumented<tokio::runtime::blocking::task::BlockingTask<<tokio::runtime::scheduler::multi_thread::worker::Launch>::launch::{closure#0}>>, tokio::runtime::blocking::schedule::BlockingSchedule>>::poll + 152
    frame #22: 0x000000000c90b848 buck2`<tokio::runtime::blocking::pool::Inner>::run + 216
    frame #23: 0x000000000c9002ab buck2`std::sys_common::backtrace::__rust_begin_short_backtrace::<<tokio::runtime::blocking::pool::Spawner>::spawn_thread::{closure#0}, ()> + 187
    frame #24: 0x000000000c90042e buck2`<<std::thread::Builder>::spawn_unchecked_<<tokio::runtime::blocking::pool::Spawner>::spawn_thread::{closure#0}, ()>::{closure#1} as core::ops::function::FnOnce<()>>::call_once::{shim:vtable#0} + 158
    frame #25: 0x000000000757e4b5 buck2`std::sys::unix::thread::Thread::new::thread_start::h618b0b8e7bda0b2b + 37
    frame #26: 0x00007f2ba1e9abaf
```

This hit an open source user super hard over in #555 .

This diff approaches this issue by putting all the `#[instrument]`s in dice behind `#[cfg(debug_assertions)]`. This allows them to still be used in debug mode, but keeps them out of any hot paths. I do not have numbers to show that most of these matter. The only one I know matters for sure is `recompute`.

Alternatives are to either try and tailor this to the callsites we know are hot (that sounds error prone) or to revert the stack that inadvertently introduced the RwLock. Even if we did that though, it's probably still safer to keep `#[instrument]` out of super hot paths like this.

Reviewed By: stepancheg

Differential Revision: D53744041

fbshipit-source-id: 85bce9af2fec8ad1d50adc241d3b8e2bfc5cec87
  • Loading branch information
JakobDegen authored and facebook-github-bot committed Feb 15, 2024
1 parent d824120 commit 18654c7
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 17 deletions.
6 changes: 3 additions & 3 deletions dice/dice/src/impls/core/graph/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ impl VersionedGraph {
/// is only updated if the version of the new value is of a newer
/// version than what is stored.
/// Returns the new entry, and an optional old entry that was invalidated due to this update
#[instrument(level = "debug", skip(self, value, storage_type, deps, reusable), fields(key = ?key))]
#[cfg_attr(debug_assertions, instrument(level = "debug", skip(self, value, storage_type, deps, reusable), fields(key = ?key)))]
pub(crate) fn update(
&mut self,
key: VersionedGraphKey,
Expand Down Expand Up @@ -276,7 +276,7 @@ impl VersionedGraph {
}
}

#[instrument(level = "debug", skip(self, value, deps), fields(key = ?key, v = %v, first_dep_dirtied = ?first_dep_dirtied, latest_dep_verified = ?latest_dep_verified))]
#[cfg_attr(debug_assertions, instrument(level = "debug", skip(self, value, deps), fields(key = ?key, v = %v, first_dep_dirtied = ?first_dep_dirtied, latest_dep_verified = ?latest_dep_verified)))]
fn update_empty(
&mut self,
key: DiceKey,
Expand Down Expand Up @@ -305,7 +305,7 @@ impl VersionedGraph {
}

/// Returns the newly updated value for the key, and whether or not any state changed.
#[instrument(level = "debug", skip(self, value, deps, num_to_keep, reusable), fields(key = ?key, key_of_e = %key_of_e, first_dep_dirtied = ?first_dep_dirtied, latest_dep_verified = ?latest_dep_verified))]
#[cfg_attr(debug_assertions, instrument(level = "debug", skip(self, value, deps, num_to_keep, reusable), fields(key = ?key, key_of_e = %key_of_e, first_dep_dirtied = ?first_dep_dirtied, latest_dep_verified = ?latest_dep_verified)))]
fn update_entry(
&mut self,
key_of_e: VersionNumber,
Expand Down
2 changes: 1 addition & 1 deletion dice/dice/src/impls/core/processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl StateProcessor {
debug!("Processor terminated");
}

#[instrument(skip_all, fields(kind = %message.variant_name()))]
#[cfg_attr(debug_assertions, instrument(skip_all, fields(kind = %message.variant_name())))]
fn iteration(&mut self, message: StateRequest) {
match message {
StateRequest::UpdateState { changes, resp } => {
Expand Down
8 changes: 4 additions & 4 deletions dice/dice/src/impls/incremental/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ impl IncrementalEngine {
)
}

#[instrument(
#[cfg_attr(debug_assertions, instrument(
level = "debug",
skip(state, promise, eval, event_dispatcher),
fields(k = ?k, version = %v),
)]
))]
pub(crate) fn project_for_key(
state: CoreStateHandle,
promise: DicePromise,
Expand Down Expand Up @@ -289,11 +289,11 @@ impl IncrementalEngine {

/// determines if the given 'Dependency' has changed between versions 'last_version' and
/// 'target_version'
#[instrument(
#[cfg_attr(debug_assertions, instrument(
level = "debug",
skip(self, eval, deps, check_deps_state),
fields(version = %eval.per_live_version_ctx.get_version(), verified_versions = %verified_versions)
)]
))]
async fn compute_whether_dependencies_changed(
&self,
parent_key: ParentKey,
Expand Down
2 changes: 1 addition & 1 deletion dice/dice/src/legacy/incremental/dep_trackers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ mod internals {
where
K: IncrementalComputeProperties,
{
#[instrument(level = "info", skip(self, transaction_ctx, extra), fields(k = %self.k, version = %transaction_ctx.get_version()))]
#[cfg_attr(debug_assertions, instrument(level = "info", skip(self, transaction_ctx, extra), fields(k = %self.k, version = %transaction_ctx.get_version())))]
async fn recompute(
&self,
transaction_ctx: &Arc<TransactionCtx>,
Expand Down
16 changes: 8 additions & 8 deletions dice/dice/src/legacy/incremental/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ where
}

/// Dirties the value at K
#[instrument(level = "info", skip(self), fields(k = %k, version = %version))]
#[cfg_attr(debug_assertions, instrument(level = "info", skip(self), fields(k = %k, version = %version)))]
pub(crate) fn dirty(&self, k: K::Key, version: VersionNumber, force_dirty: bool) {
// It is crucial that we dirty first before updating the rdeps.
// This is related to the race condition where we invalidate while nodes are being inserted
Expand Down Expand Up @@ -269,7 +269,7 @@ where
}

/// Updates the value at K. Returns whether this injected value actually causes a change
#[instrument(level = "info", skip(self, res, ), fields(k = %k, version = %version))]
#[cfg_attr(debug_assertions, instrument(level = "info", skip(self, res, ), fields(k = %k, version = %version)))]
pub(crate) fn update_injected_value(
self: &Arc<Self>,
k: K::Key,
Expand Down Expand Up @@ -355,11 +355,11 @@ where
}
}

#[instrument(
#[cfg_attr(debug_assertions, instrument(
level = "debug",
skip(self, transaction_ctx, extra, cancelled_instance),
fields(k = %k),
)]
))]
fn new_dice_task(
self: Arc<IncrementalEngine<K>>,
k: K::Key,
Expand Down Expand Up @@ -539,11 +539,11 @@ where
(task, fut)
}

#[instrument(
#[cfg_attr(debug_assertions, instrument(
level = "debug",
skip(self, transaction_ctx, extra, cancellation),
fields(k = %k, version = %transaction_ctx.get_version()),
)]
))]
async fn compute(
self: &Arc<Self>,
k: &K::Key,
Expand Down Expand Up @@ -630,11 +630,11 @@ impl<P: ProjectionKey> IncrementalEngine<ProjectionKeyProperties<P>> {
}

/// Synchronously evaluate projection key without recording dependencies.
#[instrument(
#[cfg_attr(debug_assertions, instrument(
level = "debug",
skip(self, transaction_ctx, extra, derive_from),
fields(k = %k, v = %transaction_ctx.get_version()),
)]
))]
fn eval_projection_versioned(
self: &Arc<Self>,
k: &ProjectionKeyAsKey<P>,
Expand Down

0 comments on commit 18654c7

Please sign in to comment.