Skip to content

Commit

Permalink
feat: add limit and first
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewgazelka committed Nov 21, 2024
1 parent f6eb993 commit 5f28376
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
15 changes: 14 additions & 1 deletion src/daft-connect/src/translation/logical_plan.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use daft_logical_plan::LogicalPlanBuilder;
use eyre::{bail, ensure, Context};
use spark_connect::{relation::RelType, Range, Relation};
use spark_connect::{relation::RelType, Limit, Range, Relation};
use tracing::warn;

pub fn to_logical_plan(relation: Relation) -> eyre::Result<LogicalPlanBuilder> {
Expand All @@ -13,11 +13,24 @@ pub fn to_logical_plan(relation: Relation) -> eyre::Result<LogicalPlanBuilder> {
};

match rel_type {
RelType::Limit(l) => limit(*l).wrap_err("Failed to apply limit to logical plan"),
RelType::Range(r) => range(r).wrap_err("Failed to apply range to logical plan"),
plan => bail!("Unsupported relation type: {plan:?}"),
}
}

fn limit(limit: Limit) -> eyre::Result<LogicalPlanBuilder> {
let Limit { input, limit } = limit;

let Some(input) = input else {
bail!("input must be set");
};

let plan = to_logical_plan(*input)?.limit(i64::from(limit), false)?; // todo: eager or no

Ok(plan)
}

fn range(range: Range) -> eyre::Result<LogicalPlanBuilder> {
#[cfg(not(feature = "python"))]
bail!("Range operations require Python feature to be enabled");
Expand Down
13 changes: 13 additions & 0 deletions tests/connect/test_range_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,16 @@ def test_range_operation(spark_session):
# Verify the DataFrame has expected values
assert len(pandas_df) == 10, "DataFrame should have 10 rows"
assert list(pandas_df["id"]) == list(range(10)), "DataFrame should contain values 0-9"


def test_range_first(spark_session):
spark_range = spark_session.range(10)
first_row = spark_range.first()
assert first_row["id"] == 0, "First row should have id=0"


def test_range_limit(spark_session):
spark_range = spark_session.range(10)
limited_df = spark_range.limit(5).toPandas()
assert len(limited_df) == 5, "Limited DataFrame should have 5 rows"
assert list(limited_df["id"]) == list(range(5)), "Limited DataFrame should contain values 0-4"

0 comments on commit 5f28376

Please sign in to comment.