Skip to content

Commit

Permalink
feat: Add GroupsAccumulator and GroupsAccumulatorFlatAdapter, extendi…
Browse files Browse the repository at this point in the history
…ng API (#174)

This is based on upstream GroupsAccumulator and
GroupsAccumulatorAdapter, but extends the API so that existing hash
aggregation works with it.  We basically don't really use the upstream
interface (at this time).

We still use basic Accumulator for types that do not implement
GroupsAccumulator, and hash aggregation code handles this poorly.
  • Loading branch information
srh authored Nov 7, 2024
1 parent b3acc9f commit a8f045a
Show file tree
Hide file tree
Showing 8 changed files with 1,780 additions and 67 deletions.
4 changes: 2 additions & 2 deletions datafusion/src/cube_ext/joinagg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::execution::context::{ExecutionContextState, ExecutionProps};
use crate::logical_plan::{DFSchemaRef, Expr, LogicalPlan, UserDefinedLogicalNode};
use crate::optimizer::optimizer::OptimizerRule;
use crate::optimizer::utils::from_plan;
use crate::physical_plan::hash_aggregate::{Accumulators, AggregateMode};
use crate::physical_plan::hash_aggregate::{create_accumulation_state, AggregateMode};
use crate::physical_plan::planner::{physical_name, ExtensionPlanner};
use crate::physical_plan::{hash_aggregate, PhysicalPlanner};
use crate::physical_plan::{
Expand Down Expand Up @@ -245,7 +245,7 @@ impl ExecutionPlan for CrossJoinAggExec {
&AggregateMode::Full,
self.group_expr.len(),
)?;
let mut accumulators = Accumulators::new();
let mut accumulators = create_accumulation_state(&self.agg_expr)?;
for partition in 0..self.join.right.output_partitioning().partition_count() {
let mut batches = self.join.right.execute(partition).await?;
while let Some(right) = batches.next().await {
Expand Down
19 changes: 19 additions & 0 deletions datafusion/src/physical_plan/expressions/average.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use std::convert::TryFrom;
use std::sync::Arc;

use crate::error::{DataFusionError, Result};
use crate::physical_plan::groups_accumulator::GroupsAccumulator;
use crate::physical_plan::groups_accumulator_flat_adapter::GroupsAccumulatorFlatAdapter;
use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
use crate::scalar::ScalarValue;
use arrow::compute;
Expand Down Expand Up @@ -112,6 +114,23 @@ impl AggregateExpr for Avg {
)?))
}

fn uses_groups_accumulator(&self) -> bool {
return true;
}

/// the groups accumulator used to accumulate values from the expression. If this returns None,
/// create_accumulator must be used.
fn create_groups_accumulator(
&self,
) -> arrow::error::Result<Option<Box<dyn GroupsAccumulator>>> {
Ok(Some(Box::new(
GroupsAccumulatorFlatAdapter::<AvgAccumulator>::new(|| {
// avg is f64 (as in create_accumulator)
AvgAccumulator::try_new(&DataType::Float64)
}),
)))
}

fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
vec![self.expr.clone()]
}
Expand Down
21 changes: 20 additions & 1 deletion datafusion/src/physical_plan/expressions/sum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use std::convert::TryFrom;
use std::sync::Arc;

use crate::error::{DataFusionError, Result};
use crate::physical_plan::groups_accumulator::GroupsAccumulator;
use crate::physical_plan::groups_accumulator_flat_adapter::GroupsAccumulatorFlatAdapter;
use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
use crate::scalar::ScalarValue;
use arrow::compute;
Expand All @@ -42,7 +44,7 @@ use super::format_state_name;
use smallvec::smallvec;
use smallvec::SmallVec;

// SUM aggregate expression
/// SUM aggregate expression
#[derive(Debug)]
pub struct Sum {
name: String,
Expand Down Expand Up @@ -118,6 +120,23 @@ impl AggregateExpr for Sum {
Ok(Box::new(SumAccumulator::try_new(&self.data_type)?))
}

fn uses_groups_accumulator(&self) -> bool {
return true;
}

/// the groups accumulator used to accumulate values from the expression. If this returns None,
/// create_accumulator must be used.
fn create_groups_accumulator(
&self,
) -> arrow::error::Result<Option<Box<dyn GroupsAccumulator>>> {
let data_type = self.data_type.clone();
Ok(Some(Box::new(
GroupsAccumulatorFlatAdapter::<SumAccumulator>::new(move || {
SumAccumulator::try_new(&data_type)
}),
)))
}

fn name(&self) -> &str {
&self.name
}
Expand Down
Loading

0 comments on commit a8f045a

Please sign in to comment.