diff --git a/.changesets/maint_lrlna_remove_catch_unwind.md b/.changesets/maint_lrlna_remove_catch_unwind.md new file mode 100644 index 00000000000..3c4472e6d74 --- /dev/null +++ b/.changesets/maint_lrlna_remove_catch_unwind.md @@ -0,0 +1,15 @@ + +### Remove catch_unwind wrapper around the native query planner ([PR #6396](https://github.com/apollographql/router/pull/6396)) + +As part of internal maintenance of the query planner, we are removing the +catch_unwind wrapper around the native query planner. This wrapper was used as +an extra safeguard for potential panics the native planner could produce. The +native query planner no longer has any code paths that could panic. We have also +not witnessed a panic in the last four months, having processed 560 million real +user operations through the native planner. + +This maintenance work also removes backtrace capture for federation errors which +was used for debugging and is no longer necessary as we have the confidence in +the native planner's implementation. + +By [@lrlna](https://github.com/lrlna) in https://github.com/apollographql/router/pull/6396 diff --git a/apollo-router/src/executable.rs b/apollo-router/src/executable.rs index a67e294a7f2..2dce46b1283 100644 --- a/apollo-router/src/executable.rs +++ b/apollo-router/src/executable.rs @@ -1,6 +1,5 @@ //! Main entry point for CLI command to start server. -use std::cell::Cell; use std::env; use std::fmt::Debug; use std::net::SocketAddr; @@ -751,20 +750,10 @@ fn setup_panic_handler() { } else { tracing::error!("{}", e) } - if !USING_CATCH_UNWIND.get() { - // Once we've panic'ed the behaviour of the router is non-deterministic - // We've logged out the panic details. Terminate with an error code - std::process::exit(1); - } + std::process::exit(1); })); } -// TODO: once the Rust query planner does not use `todo!()` anymore, -// remove this and the use of `catch_unwind` to call it. -thread_local! { - pub(crate) static USING_CATCH_UNWIND: Cell = const { Cell::new(false) }; -} - static COPIED: AtomicBool = AtomicBool::new(false); fn copy_args_to_env() { diff --git a/apollo-router/src/query_planner/dual_query_planner.rs b/apollo-router/src/query_planner/dual_query_planner.rs index 9e5250a447b..831bdebe452 100644 --- a/apollo-router/src/query_planner/dual_query_planner.rs +++ b/apollo-router/src/query_planner/dual_query_planner.rs @@ -15,6 +15,7 @@ use apollo_compiler::validation::Valid; use apollo_compiler::ExecutableDocument; use apollo_compiler::Name; use apollo_compiler::Node; +use apollo_federation::error::FederationError; use apollo_federation::query_plan::query_planner::QueryPlanOptions; use apollo_federation::query_plan::query_planner::QueryPlanner; use apollo_federation::query_plan::QueryPlan; @@ -24,7 +25,6 @@ use super::fetch::SubgraphOperation; use super::subscription::SubscriptionNode; use super::FlattenNode; use crate::error::format_bridge_errors; -use crate::executable::USING_CATCH_UNWIND; use crate::query_planner::bridge_query_planner::metric_query_planning_plan_duration; use crate::query_planner::bridge_query_planner::JS_QP_MODE; use crate::query_planner::bridge_query_planner::RUST_QP_MODE; @@ -80,47 +80,23 @@ impl BothModeComparisonJob { } fn execute(self) { - // TODO: once the Rust query planner does not use `todo!()` anymore, - // remove `USING_CATCH_UNWIND` and this use of `catch_unwind`. - let rust_result = std::panic::catch_unwind(|| { - let name = self - .operation_name - .clone() - .map(Name::try_from) - .transpose()?; - USING_CATCH_UNWIND.set(true); - - let start = Instant::now(); - - // No question mark operator or macro from here … - let result = + let start = Instant::now(); + + let rust_result = self + .operation_name + .as_deref() + .map(|n| Name::new(n).map_err(FederationError::from)) + .transpose() + .and_then(|operation| { self.rust_planner - .build_query_plan(&self.document, name, self.plan_options); - - let elapsed = start.elapsed().as_secs_f64(); - metric_query_planning_plan_duration(RUST_QP_MODE, elapsed); - - metric_query_planning_plan_both_comparison_duration(RUST_QP_MODE, elapsed); - metric_query_planning_plan_both_comparison_duration(JS_QP_MODE, self.js_duration); - - // … to here, so the thread can only eiher reach here or panic. - // We unset USING_CATCH_UNWIND in both cases. - USING_CATCH_UNWIND.set(false); - result - }) - .unwrap_or_else(|panic| { - USING_CATCH_UNWIND.set(false); - Err(apollo_federation::error::FederationError::internal( - format!( - "query planner panicked: {}", - panic - .downcast_ref::() - .map(|s| s.as_str()) - .or_else(|| panic.downcast_ref::<&str>().copied()) - .unwrap_or_default() - ), - )) - }); + .build_query_plan(&self.document, operation, self.plan_options) + }); + + let elapsed = start.elapsed().as_secs_f64(); + metric_query_planning_plan_duration(RUST_QP_MODE, elapsed); + + metric_query_planning_plan_both_comparison_duration(RUST_QP_MODE, elapsed); + metric_query_planning_plan_both_comparison_duration(JS_QP_MODE, self.js_duration); let name = self.operation_name.as_deref(); let operation_desc = if let Ok(operation) = self.document.operations.get(name) {