diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs index dd36665a9344..9f25a0b2fa47 100644 --- a/datafusion-examples/examples/custom_datasource.rs +++ b/datafusion-examples/examples/custom_datasource.rs @@ -32,7 +32,7 @@ use datafusion::physical_plan::expressions::PhysicalSortExpr; use datafusion::physical_plan::memory::MemoryStream; use datafusion::physical_plan::{ project_schema, DisplayAs, DisplayFormatType, ExecutionPlan, - SendableRecordBatchStream, Statistics, + SendableRecordBatchStream, }; use datafusion::prelude::*; use datafusion_expr::{Expr, LogicalPlanBuilder}; @@ -270,8 +270,4 @@ impl ExecutionPlan for CustomExec { None, )?)) } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 5a1fdcaee509..419f62cff664 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -2057,9 +2057,7 @@ mod tests { use super::*; use crate::datasource::file_format::options::CsvReadOptions; use crate::datasource::MemTable; - use crate::physical_plan::{ - expressions, DisplayFormatType, Partitioning, Statistics, - }; + use crate::physical_plan::{expressions, DisplayFormatType, Partitioning}; use crate::physical_plan::{DisplayAs, SendableRecordBatchStream}; use crate::physical_planner::PhysicalPlanner; use crate::prelude::{SessionConfig, SessionContext}; @@ -2670,10 +2668,6 @@ mod tests { ) -> Result { unimplemented!("NoOpExecutionPlan::execute"); } - - fn statistics(&self) -> Result { - unimplemented!("NoOpExecutionPlan::statistics"); - } } // Produces an execution plan where the schema is mismatched from diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs index d826ec8bfbb6..4fe022f1769d 100644 --- a/datafusion/core/src/test_util/mod.rs +++ b/datafusion/core/src/test_util/mod.rs @@ -40,7 +40,7 @@ use crate::prelude::{CsvReadOptions, SessionContext}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; -use datafusion_common::{Statistics, TableReference}; +use datafusion_common::TableReference; use datafusion_expr::{CreateExternalTable, Expr, TableType}; use datafusion_physical_expr::PhysicalSortExpr; @@ -238,10 +238,6 @@ impl ExecutionPlan for UnboundedExec { batch: self.batch.clone(), })) } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } } #[derive(Debug)] diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs index bce242513559..ded37983bb21 100644 --- a/datafusion/physical-plan/src/analyze.rs +++ b/datafusion/physical-plan/src/analyze.rs @@ -25,7 +25,7 @@ use super::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter}; use super::{DisplayAs, Distribution, SendableRecordBatchStream}; use crate::display::DisplayableExecutionPlan; -use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics}; +use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch}; use datafusion_common::{internal_err, DataFusionError, Result}; @@ -195,11 +195,6 @@ impl ExecutionPlan for AnalyzeExec { futures::stream::once(output), ))) } - - fn statistics(&self) -> Result { - // Statistics an an ANALYZE plan are not relevant - Ok(Statistics::new_unknown(&self.schema())) - } } /// Creates the ouput of AnalyzeExec as a RecordBatch diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs index 81b8f9944110..e4904ddd3410 100644 --- a/datafusion/physical-plan/src/explain.rs +++ b/datafusion/physical-plan/src/explain.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use super::expressions::PhysicalSortExpr; use super::{DisplayAs, SendableRecordBatchStream}; use crate::stream::RecordBatchStreamAdapter; -use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics}; +use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch}; use datafusion_common::display::StringifiedPlan; @@ -167,11 +167,6 @@ impl ExecutionPlan for ExplainExec { futures::stream::iter(vec![Ok(record_batch)]), ))) } - - fn statistics(&self) -> Result { - // Statistics an EXPLAIN plan are not relevant - Ok(Statistics::new_unknown(&self.schema())) - } } /// If this plan should be shown, given the previous plan that was diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs index d1f2706930d2..627d58e13781 100644 --- a/datafusion/physical-plan/src/insert.rs +++ b/datafusion/physical-plan/src/insert.rs @@ -25,7 +25,6 @@ use std::sync::Arc; use super::expressions::PhysicalSortExpr; use super::{ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, - Statistics, }; use crate::metrics::MetricsSet; use crate::stream::RecordBatchStreamAdapter; @@ -276,10 +275,6 @@ impl ExecutionPlan for FileSinkExec { stream, ))) } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } } /// Create a output record batch with a count diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index d7987ba95abf..b2f81579f8e8 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -231,8 +231,12 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { None } - /// Returns the global output statistics for this `ExecutionPlan` node. - fn statistics(&self) -> Result; + /// Returns statistics for this `ExecutionPlan` node. If statistics are not + /// available, should return [`Statistics::new_unknown`] (the default), not + /// an error. + fn statistics(&self) -> Result { + Ok(Statistics::new_unknown(&self.schema())) + } } /// Indicate whether a data exchange is needed for the input of `plan`, which will be very helpful diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index 7bfa7e2ceefb..27f03b727c29 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -26,7 +26,7 @@ use crate::stream::RecordBatchStreamAdapter; use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream}; use arrow::datatypes::SchemaRef; -use datafusion_common::{internal_err, plan_err, DataFusionError, Result, Statistics}; +use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; @@ -187,8 +187,4 @@ impl ExecutionPlan for StreamingTableExec { None => stream, }) } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } } diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs index f90f4231c620..71e6cba6741e 100644 --- a/datafusion/physical-plan/src/test/exec.rs +++ b/datafusion/physical-plan/src/test/exec.rs @@ -453,10 +453,6 @@ impl ExecutionPlan for ErrorExec { ) -> Result { internal_err!("ErrorExec, unsurprisingly, errored in partition {partition}") } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } } /// A mock execution plan that simply returns the provided statistics @@ -627,10 +623,6 @@ impl ExecutionPlan for BlockingExec { _refs: Arc::clone(&self.refs), })) } - - fn statistics(&self) -> Result { - unimplemented!() - } } /// A [`RecordBatchStream`] that is pending forever. @@ -764,10 +756,6 @@ impl ExecutionPlan for PanicExec { ready: false, })) } - - fn statistics(&self) -> Result { - unimplemented!() - } } /// A [`RecordBatchStream`] that yields every other batch and panics diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index ed64735e5ad6..30f109953cbb 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -25,7 +25,7 @@ use super::DisplayAs; use crate::{ expressions::Column, DisplayFormatType, Distribution, EquivalenceProperties, ExecutionPlan, Partitioning, PhysicalExpr, PhysicalSortExpr, RecordBatchStream, - SendableRecordBatchStream, Statistics, + SendableRecordBatchStream, }; use arrow::array::{ @@ -159,10 +159,6 @@ impl ExecutionPlan for UnnestExec { unnest_time: 0, })) } - - fn statistics(&self) -> Result { - Ok(Statistics::new_unknown(&self.schema())) - } } /// A stream that issues [RecordBatch]es with unnested column data.