Skip to content

Commit

Permalink
[FEAT] Add concat to new execution model + buffered intermediate ops (#…
Browse files Browse the repository at this point in the history
…2519)

This PR adds Concat and buffered intermediate ops to the new execution
model, in addition to some refactors to support this change.

- There are now two types of sinks, single input (limit, agg) and double
input (concat, join).
- Intermediate ops can now buffer their outputs via OperatorTaskState.
- Add a channel abstraction for in-order vs out-of-order channels. The
in-order channel uses a round robin implementation over a vec of
channels with capacity 1, while the out-of-order channel just uses a
single MPSC channel with capacity `n`.
- Removed Pipelines and opt for Actors. Each actor has control over a
single source / op / sink, can control it's own parallelism, and can
also configure it's own input channel.

---------

Co-authored-by: Colin Ho <[email protected]>
  • Loading branch information
colin-ho and Colin Ho authored Jul 17, 2024
1 parent b4446b0 commit eb040ce
Show file tree
Hide file tree
Showing 19 changed files with 686 additions and 400 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion src/daft-local-execution/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[dependencies]
async-trait = {workspace = true}
common-error = {path = "../common/error", default-features = false}
daft-core = {path = "../daft-core", default-features = false}
daft-dsl = {path = "../daft-dsl", default-features = false}
Expand Down
157 changes: 157 additions & 0 deletions src/daft-local-execution/src/channel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
use std::sync::Arc;

use common_error::DaftResult;
use daft_micropartition::MicroPartition;

pub type SingleSender = tokio::sync::mpsc::Sender<DaftResult<Arc<MicroPartition>>>;
pub type SingleReceiver = tokio::sync::mpsc::Receiver<DaftResult<Arc<MicroPartition>>>;

pub fn spawn_compute_task<F>(future: F)
where
F: std::future::Future<Output = DaftResult<()>> + Send + 'static,
{
tokio::spawn(async move {
let _ = future.await;
});
}

pub fn create_single_channel(buffer_size: usize) -> (SingleSender, SingleReceiver) {
tokio::sync::mpsc::channel(buffer_size)
}

pub fn create_channel(buffer_size: usize, in_order: bool) -> (MultiSender, MultiReceiver) {
if in_order {
let (senders, receivers) = (0..buffer_size).map(|_| create_single_channel(1)).unzip();
let sender = MultiSender::InOrder(InOrderSender::new(senders));
let receiver = MultiReceiver::InOrder(InOrderReceiver::new(receivers));
(sender, receiver)
} else {
let (sender, receiver) = create_single_channel(buffer_size);
let sender = MultiSender::OutOfOrder(OutOfOrderSender::new(sender));
let receiver = MultiReceiver::OutOfOrder(OutOfOrderReceiver::new(receiver));
(sender, receiver)
}
}

pub enum MultiSender {
InOrder(InOrderSender),
OutOfOrder(OutOfOrderSender),
}

impl MultiSender {
pub fn get_next_sender(&mut self) -> SingleSender {
match self {
Self::InOrder(sender) => sender.get_next_sender(),
Self::OutOfOrder(sender) => sender.get_sender(),
}
}

pub fn buffer_size(&self) -> usize {
match self {
Self::InOrder(sender) => sender.senders.len(),
Self::OutOfOrder(sender) => sender.sender.max_capacity(),
}
}

pub fn in_order(&self) -> bool {
match self {
Self::InOrder(_) => true,
Self::OutOfOrder(_) => false,
}
}
}
pub struct InOrderSender {
senders: Vec<SingleSender>,
curr_sender_idx: usize,
}

impl InOrderSender {
pub fn new(senders: Vec<SingleSender>) -> Self {
Self {
senders,
curr_sender_idx: 0,
}
}

pub fn get_next_sender(&mut self) -> SingleSender {
let next_idx = self.curr_sender_idx;
self.curr_sender_idx = (next_idx + 1) % self.senders.len();
self.senders[next_idx].clone()
}
}

pub struct OutOfOrderSender {
sender: SingleSender,
}

impl OutOfOrderSender {
pub fn new(sender: SingleSender) -> Self {
Self { sender }
}

pub fn get_sender(&self) -> SingleSender {
self.sender.clone()
}
}

pub enum MultiReceiver {
InOrder(InOrderReceiver),
OutOfOrder(OutOfOrderReceiver),
}

impl MultiReceiver {
pub async fn recv(&mut self) -> Option<DaftResult<Arc<MicroPartition>>> {
match self {
Self::InOrder(receiver) => receiver.recv().await,
Self::OutOfOrder(receiver) => receiver.recv().await,
}
}
}

pub struct InOrderReceiver {
receivers: Vec<SingleReceiver>,
curr_receiver_idx: usize,
is_done: bool,
}

impl InOrderReceiver {
pub fn new(receivers: Vec<SingleReceiver>) -> Self {
Self {
receivers,
curr_receiver_idx: 0,
is_done: false,
}
}

pub async fn recv(&mut self) -> Option<DaftResult<Arc<MicroPartition>>> {
if self.is_done {
return None;
}
for i in 0..self.receivers.len() {
let next_idx = (i + self.curr_receiver_idx) % self.receivers.len();
if let Some(val) = self.receivers[next_idx].recv().await {
self.curr_receiver_idx = (next_idx + 1) % self.receivers.len();
return Some(val);
}
}
self.is_done = true;
None
}
}

pub struct OutOfOrderReceiver {
receiver: SingleReceiver,
}

impl OutOfOrderReceiver {
pub fn new(receiver: SingleReceiver) -> Self {
Self { receiver }
}

pub async fn recv(&mut self) -> Option<DaftResult<Arc<MicroPartition>>> {
if let Some(val) = self.receiver.recv().await {
return Some(val);
}
None
}
}
93 changes: 0 additions & 93 deletions src/daft-local-execution/src/create_pipeline.rs

This file was deleted.

Loading

0 comments on commit eb040ce

Please sign in to comment.