From d00a085d5a5acde73a7667e8c73b5f0077d8a960 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Thu, 7 Nov 2024 19:41:07 -0500 Subject: [PATCH] chore: Move `CoalesceBatches` from core to optimizer crate (#13283) * move coalesce batches * small fix --- datafusion/core/src/physical_optimizer/mod.rs | 2 +- .../src}/coalesce_batches.rs | 20 +++++++++---------- datafusion/physical-optimizer/src/lib.rs | 1 + 3 files changed, 12 insertions(+), 11 deletions(-) rename datafusion/{core/src/physical_optimizer => physical-optimizer/src}/coalesce_batches.rs (88%) diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs index fe799a23059f..a9f6f30dc175 100644 --- a/datafusion/core/src/physical_optimizer/mod.rs +++ b/datafusion/core/src/physical_optimizer/mod.rs @@ -21,7 +21,7 @@ //! "Repartition" or "Sortedness" //! //! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan -pub mod coalesce_batches; + pub mod enforce_distribution; pub mod enforce_sorting; pub mod join_selection; diff --git a/datafusion/core/src/physical_optimizer/coalesce_batches.rs b/datafusion/physical-optimizer/src/coalesce_batches.rs similarity index 88% rename from datafusion/core/src/physical_optimizer/coalesce_batches.rs rename to datafusion/physical-optimizer/src/coalesce_batches.rs index 2f834813ede9..5cf2c877c61a 100644 --- a/datafusion/core/src/physical_optimizer/coalesce_batches.rs +++ b/datafusion/physical-optimizer/src/coalesce_batches.rs @@ -18,19 +18,19 @@ //! CoalesceBatches optimizer that groups batches together rows //! in bigger batches to avoid overhead with small batches +use crate::PhysicalOptimizerRule; + use std::sync::Arc; -use crate::{ - config::ConfigOptions, - error::Result, - physical_plan::{ - coalesce_batches::CoalesceBatchesExec, filter::FilterExec, joins::HashJoinExec, - repartition::RepartitionExec, Partitioning, - }, +use datafusion_common::config::ConfigOptions; +use datafusion_common::error::Result; +use datafusion_physical_expr::Partitioning; +use datafusion_physical_plan::{ + coalesce_batches::CoalesceBatchesExec, filter::FilterExec, joins::HashJoinExec, + repartition::RepartitionExec, ExecutionPlan, }; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_physical_optimizer::PhysicalOptimizerRule; /// Optimizer rule that introduces CoalesceBatchesExec to avoid overhead with small batches that /// are produced by highly selective filters @@ -46,9 +46,9 @@ impl CoalesceBatches { impl PhysicalOptimizerRule for CoalesceBatches { fn optimize( &self, - plan: Arc, + plan: Arc, config: &ConfigOptions, - ) -> Result> { + ) -> Result> { if !config.execution.coalesce_batches { return Ok(plan); } diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs index 9a4e4f85245a..5d0ccde9f8cd 100644 --- a/datafusion/physical-optimizer/src/lib.rs +++ b/datafusion/physical-optimizer/src/lib.rs @@ -19,6 +19,7 @@ #![deny(clippy::clone_on_ref_ptr)] pub mod aggregate_statistics; +pub mod coalesce_batches; pub mod combine_partial_final_agg; pub mod limit_pushdown; pub mod limited_distinct_aggregation;