diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index deec84d5a0ff8..e800e3cff224e 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -3634,6 +3634,130 @@ mod tests { assert_eq!(result[0].schema().metadata(), result[1].schema().metadata()); } + #[tokio::test] + async fn case_insensitive_columns_identifiers() { + // create local execution context + let mut ctx = ExecutionContext::new(); + + // register csv file with the execution context + ctx.register_csv( + "case_insensitive_test", + "tests/example.csv", + CsvReadOptions::new(), + ) + .await + .unwrap(); + + let sql = "SELECT A, b FROM case_insensitive_test"; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| a | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + + let sql = "SELECT t.A, b FROM case_insensitive_test AS t"; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| a | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + + // All identifiers should be lowercase except when quoted + + let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t"; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| x | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + + let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t"; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| x | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + + let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| X | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + + + // Order by column references should be case insensitive + + let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x"; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| x | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + + let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X"; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| x | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + + let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#; + let result = plan_and_collect(&mut ctx, sql) + .await + .expect("ran plan correctly"); + let expected = vec![ + "+---+---+", + "| X | b |", + "+---+---+", + "| 1 | 2 |", + "+---+---+", + ]; + assert_batches_sorted_eq!(expected, &result); + } + struct MyPhysicalPlanner {} #[async_trait] diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs index 462977274ecbb..ddc1eb5a4b7e8 100644 --- a/datafusion/src/sql/planner.rs +++ b/datafusion/src/sql/planner.rs @@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema), SelectItem::ExprWithAlias { expr, alias } => Ok(Alias( Box::new(self.sql_to_rex(expr, schema)?), - alias.value.clone(), + normalize_ident(alias), )), SelectItem::Wildcard => Ok(Expr::Wildcard), SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented( @@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Identifier(ref id) => { if id.value.starts_with('@') { + // TODO: figure out if ScalarVariables should be insensitive. let var_names = vec![id.value.clone()]; Ok(Expr::ScalarVariable(var_names)) } else { @@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // identifier. (e.g. it is "foo.bar" not foo.bar) Ok(Expr::Column(Column { relation: None, - name: id.value.clone(), + name: normalize_ident(id), })) } } @@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } SQLExpr::CompoundIdentifier(ids) => { - let mut var_names: Vec<_> = - ids.iter().map(|id| id.value.clone()).collect(); + let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect(); if &var_names[0][0..1] == "@" { Ok(Expr::ScalarVariable(var_names)) @@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // (e.g. "foo.bar") for function names yet function.name.to_string() } else { - // if there is a quote style, then don't normalize - // the name, otherwise normalize to lowercase - let ident = &function.name.0[0]; - match ident.quote_style { - Some(_) => ident.value.clone(), - None => ident.value.to_ascii_lowercase(), - } + normalize_ident(&function.name.0[0]) }; // first, scalar built-in @@ -2176,6 +2170,14 @@ pub fn convert_data_type(sql_type: &SQLDataType) -> Result { } } +// Normalize an identifer to a lowercase string unless the identifier is quoted. +fn normalize_ident(id: &Ident) -> String { + match id.quote_style { + Some(_) => id.value.clone(), + None => id.value.to_ascii_lowercase(), + } +} + #[cfg(test)] mod tests { use functions::ScalarFunctionImplementation;