Skip to content

Commit

Permalink
[BugFix] abort plan if order by column not in select list (#5132)
Browse files Browse the repository at this point in the history
Currently, if the order by column is not in select list, we will
always add_missing_columns to the select list. This will lead to the bug in issue #5065.

In this pr, we modify the behaviour, for select distinct, we will throw error msg
if order by expressions are not int select list.

Signed-off-by: xyz <[email protected]>
  • Loading branch information
xiaoyong-z authored Feb 11, 2023
1 parent ec9119d commit 4be5610
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 1 deletion.
34 changes: 34 additions & 0 deletions datafusion/core/tests/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use arrow::{
record_batch::RecordBatch,
};
use datafusion::from_slice::FromSlice;
use datafusion_common::DataFusionError;
use std::sync::Arc;

use datafusion::dataframe::DataFrame;
Expand Down Expand Up @@ -127,6 +128,39 @@ async fn sort_on_unprojected_columns() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn sort_on_distinct_unprojected_columns() -> Result<()> {
let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
]);

let batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![
Arc::new(Int32Array::from_slice([1, 10, 10, 100])),
Arc::new(Int32Array::from_slice([2, 12, 12, 120])),
],
)
.unwrap();

let ctx = SessionContext::new();
ctx.register_batch("t", batch).unwrap();

assert!(matches!(
ctx.table("t")
.await
.unwrap()
.select(vec![col("a")])
.unwrap()
.distinct()
.unwrap()
.sort(vec![Expr::Sort(Sort::new(Box::new(col("b")), false, true))]),
Err(DataFusionError::Plan(_))
));
Ok(())
}

#[tokio::test]
async fn filter_with_alias_overwrite() -> Result<()> {
let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
Expand Down
27 changes: 26 additions & 1 deletion datafusion/core/tests/sqllogictests/test_files/select.slt
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,29 @@ query IC rowsort
VALUES (1,'a'),(2,'b')
----
1 a
2 b
2 b

# table foo for distinct order by
statement ok
CREATE TABLE foo AS VALUES
(1, 2),
(3, 4),
(5, 6);

# foo distinct
query I
select distinct '1' from foo;
----
1

# foo order by
query I
select '1' from foo order by column1;
----
1
1
1

# foo distinct order by
statement error DataFusion error: Error during planning: For SELECT DISTINCT, ORDER BY expressions column1 must appear in select list
select distinct '1' from foo order by column1;
22 changes: 22 additions & 0 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,28 @@ impl LogicalPlanBuilder {
Ok(())
})?;

// if current plan is distinct or current plan is repartition and its child plan is distinct,
// then this plan is a select distinct plan
let is_select_distinct = match self.plan {
LogicalPlan::Distinct(_) => true,
LogicalPlan::Repartition(Repartition { ref input, .. }) => {
matches!(input.as_ref(), &LogicalPlan::Distinct(_))
}
_ => false,
};

// for select distinct, order by expressions must exist in select list
if is_select_distinct && !missing_cols.is_empty() {
let missing_col_names = missing_cols
.iter()
.map(|col| col.flat_name())
.collect::<String>();
let error_msg = format!(
"For SELECT DISTINCT, ORDER BY expressions {missing_col_names} must appear in select list",
);
return Err(DataFusionError::Plan(error_msg));
}

if missing_cols.is_empty() {
return Ok(Self::from(LogicalPlan::Sort(Sort {
expr: normalize_cols(exprs, &self.plan)?,
Expand Down
22 changes: 22 additions & 0 deletions datafusion/sql/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3213,6 +3213,28 @@ fn test_select_join_key_inner_join() {
quick_test(sql, expected);
}

#[test]
fn test_select_order_by() {
let sql = "SELECT '1' from person order by id";

let expected = "Projection: Utf8(\"1\")\n Sort: person.id ASC NULLS LAST\n Projection: Utf8(\"1\"), person.id\n TableScan: person";
quick_test(sql, expected);
}

#[test]
fn test_select_distinct_order_by() {
let sql = "SELECT distinct '1' from person order by id";

let expected =
"Error during planning: For SELECT DISTINCT, ORDER BY expressions id must appear in select list";

// It should return error.
let result = logical_plan(sql);
assert!(result.is_err());
let err = result.err().unwrap();
assert_eq!(err.to_string(), expected);
}

#[test]
fn test_duplicated_left_join_key_inner_join() {
// person.id * 2 happen twice in left side.
Expand Down

0 comments on commit 4be5610

Please sign in to comment.