diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index 1c7a4fd4d553..7ad8992ca9ae 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -151,6 +151,15 @@ impl Precision { (_, _) => Precision::Absent, } } + + /// Return the estimate of applying a filter with estimated selectivity + /// `selectivity` to this Precision. A selectivity of `1.0` means that all + /// rows are selected. A selectivity of `0.5` means half the rows are + /// selected. Will always return inexact statistics. + pub fn with_estimated_selectivity(self, selectivity: f64) -> Self { + self.map(|v| ((v as f64 * selectivity).ceil()) as usize) + .to_inexact() + } } impl Precision { diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 597e1d523a24..107c95eff7f1 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -200,15 +200,12 @@ impl ExecutionPlan for FilterExec { // assume filter selects 20% of rows if we cannot do anything smarter // tracking issue for making this configurable: // https://github.com/apache/arrow-datafusion/issues/8133 - let selectivity = 0.2_f32; - let mut stats = input_stats.into_inexact(); - if let Precision::Inexact(n) = stats.num_rows { - stats.num_rows = Precision::Inexact((selectivity * n as f32) as usize); - } - if let Precision::Inexact(n) = stats.total_byte_size { - stats.total_byte_size = - Precision::Inexact((selectivity * n as f32) as usize); - } + let selectivity = 0.2_f64; + let mut stats = input_stats.clone().into_inexact(); + stats.num_rows = stats.num_rows.with_estimated_selectivity(selectivity); + stats.total_byte_size = stats + .total_byte_size + .with_estimated_selectivity(selectivity); return Ok(stats); } @@ -222,14 +219,8 @@ impl ExecutionPlan for FilterExec { // Estimate (inexact) selectivity of predicate let selectivity = analysis_ctx.selectivity.unwrap_or(1.0); - let num_rows = match num_rows.get_value() { - Some(nr) => Precision::Inexact((*nr as f64 * selectivity).ceil() as usize), - None => Precision::Absent, - }; - let total_byte_size = match total_byte_size.get_value() { - Some(tbs) => Precision::Inexact((*tbs as f64 * selectivity).ceil() as usize), - None => Precision::Absent, - }; + let num_rows = num_rows.with_estimated_selectivity(selectivity); + let total_byte_size = total_byte_size.with_estimated_selectivity(selectivity); let column_statistics = collect_new_statistics( &input_stats.column_statistics,