-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement skeleton logic with proper module visibility
- Loading branch information
Jay Chia
committed
Dec 20, 2024
1 parent
9b0c1df
commit 511d996
Showing
5 changed files
with
150 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
src/daft-scan/src/scan_task_iters/split_parquet/fetch_parquet_metadata.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
use common_daft_config::DaftExecutionConfig; | ||
use common_error::DaftResult; | ||
|
||
use super::{split_parquet_decision, split_parquet_file}; | ||
use crate::ScanTaskRef; | ||
|
||
pub(super) struct RetrieveParquetMetadataIterator<'cfg> { | ||
decider: split_parquet_decision::DecideSplitIterator<'cfg>, | ||
_cfg: &'cfg DaftExecutionConfig, | ||
} | ||
|
||
impl<'cfg> RetrieveParquetMetadataIterator<'cfg> { | ||
pub(super) fn new( | ||
decider: split_parquet_decision::DecideSplitIterator<'cfg>, | ||
cfg: &'cfg DaftExecutionConfig, | ||
) -> Self { | ||
Self { decider, _cfg: cfg } | ||
} | ||
} | ||
|
||
pub(super) enum ParquetSplitScanTaskGenerator { | ||
_NoSplit(std::iter::Once<DaftResult<ScanTaskRef>>), | ||
_Split(split_parquet_file::ParquetFileSplitter), | ||
} | ||
|
||
impl<'cfg> Iterator for RetrieveParquetMetadataIterator<'cfg> { | ||
type Item = ParquetSplitScanTaskGenerator; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
if let Some(_decision) = self.decider.next() { | ||
todo!("Implement windowed metadata fetching and yielding of ParquetSplitScanTaskGenerator"); | ||
} | ||
None | ||
} | ||
} | ||
|
||
impl Iterator for ParquetSplitScanTaskGenerator { | ||
type Item = DaftResult<ScanTaskRef>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
match self { | ||
Self::_NoSplit(iter) => iter.next(), | ||
Self::_Split(iter) => iter.next(), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
use std::iter::Flatten; | ||
|
||
use common_daft_config::DaftExecutionConfig; | ||
use common_error::DaftResult; | ||
|
||
use super::BoxScanTaskIter; | ||
use crate::ScanTaskRef; | ||
|
||
mod fetch_parquet_metadata; | ||
mod split_parquet_decision; | ||
mod split_parquet_file; | ||
|
||
pub struct SplitParquetScanTasks<'cfg> { | ||
retriever: fetch_parquet_metadata::RetrieveParquetMetadataIterator<'cfg>, | ||
} | ||
|
||
impl<'cfg> SplitParquetScanTasks<'cfg> { | ||
pub fn new(inputs: BoxScanTaskIter<'cfg>, cfg: &'cfg DaftExecutionConfig) -> Self { | ||
let decider = split_parquet_decision::DecideSplitIterator::new(inputs, cfg); | ||
let retriever = fetch_parquet_metadata::RetrieveParquetMetadataIterator::new(decider, cfg); | ||
SplitParquetScanTasks { retriever } | ||
} | ||
} | ||
|
||
pub struct SplitParquetScanTasksIterator<'cfg>( | ||
Flatten<fetch_parquet_metadata::RetrieveParquetMetadataIterator<'cfg>>, | ||
); | ||
|
||
impl<'cfg> IntoIterator for SplitParquetScanTasks<'cfg> { | ||
type IntoIter = SplitParquetScanTasksIterator<'cfg>; | ||
type Item = DaftResult<ScanTaskRef>; | ||
|
||
fn into_iter(self) -> Self::IntoIter { | ||
SplitParquetScanTasksIterator(self.retriever.flatten()) | ||
} | ||
} | ||
|
||
impl<'cfg> Iterator for SplitParquetScanTasksIterator<'cfg> { | ||
type Item = DaftResult<ScanTaskRef>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
self.0.next() | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
src/daft-scan/src/scan_task_iters/split_parquet/split_parquet_decision.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
use common_daft_config::DaftExecutionConfig; | ||
|
||
use crate::scan_task_iters::BoxScanTaskIter; | ||
|
||
pub(super) struct DecideSplitIterator<'cfg> { | ||
inputs: BoxScanTaskIter<'cfg>, | ||
_cfg: &'cfg DaftExecutionConfig, | ||
} | ||
|
||
impl<'cfg> DecideSplitIterator<'cfg> { | ||
pub fn new(inputs: BoxScanTaskIter<'cfg>, cfg: &'cfg DaftExecutionConfig) -> Self { | ||
Self { inputs, _cfg: cfg } | ||
} | ||
} | ||
|
||
pub(super) struct Decision {} | ||
|
||
impl<'cfg> Iterator for DecideSplitIterator<'cfg> { | ||
type Item = Decision; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
if let Some(_scan_task) = self.inputs.next() { | ||
return Some(Decision {}); | ||
} | ||
None | ||
} | ||
} |
13 changes: 13 additions & 0 deletions
13
src/daft-scan/src/scan_task_iters/split_parquet/split_parquet_file.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
use common_error::DaftResult; | ||
|
||
use crate::ScanTaskRef; | ||
|
||
pub(super) struct ParquetFileSplitter {} | ||
|
||
impl Iterator for ParquetFileSplitter { | ||
type Item = DaftResult<ScanTaskRef>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
todo!("Split the parquet file"); | ||
} | ||
} |