Skip to content

Commit

Permalink
fix: Case insensitive unquoted identifiers (#1747)
Browse files Browse the repository at this point in the history
  • Loading branch information
mkmik authored Feb 7, 2022
1 parent fe46a1e commit d014ff2
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 12 deletions.
167 changes: 167 additions & 0 deletions datafusion/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3359,6 +3359,173 @@ mod tests {
assert_eq!(result[0].schema().metadata(), result[1].schema().metadata());
}

#[tokio::test]
async fn normalized_column_identifiers() {
// create local execution context
let mut ctx = ExecutionContext::new();

// register csv file with the execution context
ctx.register_csv(
"case_insensitive_test",
"tests/example.csv",
CsvReadOptions::new(),
)
.await
.unwrap();

let sql = "SELECT A, b FROM case_insensitive_test";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Aliases

let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Order by

let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Where

let sql = "SELECT a, b FROM case_insensitive_test where A IS NOT null";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// Group by

let sql = "SELECT a as x, count(*) as c FROM case_insensitive_test GROUP BY X";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | c |",
"+---+---+",
"| 1 | 1 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql =
r#"SELECT a as "X", count(*) as c FROM case_insensitive_test GROUP BY "X""#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | c |",
"+---+---+",
"| 1 | 1 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);
}

struct MyPhysicalPlanner {}

#[async_trait]
Expand Down
18 changes: 6 additions & 12 deletions datafusion/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::logical_plan::{
use crate::optimizer::utils::exprlist_to_columns;
use crate::prelude::JoinType;
use crate::scalar::ScalarValue;
use crate::sql::utils::make_decimal_type;
use crate::sql::utils::{make_decimal_type, normalize_ident};
use crate::{
error::{DataFusionError, Result},
physical_plan::udaf::AggregateUDF,
Expand Down Expand Up @@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
Box::new(self.sql_to_rex(expr, schema)?),
alias.value.clone(),
normalize_ident(alias),
)),
SelectItem::Wildcard => Ok(Expr::Wildcard),
SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
Expand Down Expand Up @@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

SQLExpr::Identifier(ref id) => {
if id.value.starts_with('@') {
// TODO: figure out if ScalarVariables should be insensitive.
let var_names = vec![id.value.clone()];
Ok(Expr::ScalarVariable(var_names))
} else {
Expand All @@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// identifier. (e.g. it is "foo.bar" not foo.bar)
Ok(Expr::Column(Column {
relation: None,
name: id.value.clone(),
name: normalize_ident(id),
}))
}
}
Expand All @@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}

SQLExpr::CompoundIdentifier(ids) => {
let mut var_names: Vec<_> =
ids.iter().map(|id| id.value.clone()).collect();
let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect();

if &var_names[0][0..1] == "@" {
Ok(Expr::ScalarVariable(var_names))
Expand Down Expand Up @@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// (e.g. "foo.bar") for function names yet
function.name.to_string()
} else {
// if there is a quote style, then don't normalize
// the name, otherwise normalize to lowercase
let ident = &function.name.0[0];
match ident.quote_style {
Some(_) => ident.value.clone(),
None => ident.value.to_ascii_lowercase(),
}
normalize_ident(&function.name.0[0])
};

// first, scalar built-in
Expand Down
9 changes: 9 additions & 0 deletions datafusion/src/sql/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! SQL Utility Functions
use arrow::datatypes::DataType;
use sqlparser::ast::Ident;

use crate::logical_plan::{Expr, LogicalPlan};
use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128};
Expand Down Expand Up @@ -532,6 +533,14 @@ pub(crate) fn make_decimal_type(
}
}

// Normalize an identifer to a lowercase string unless the identifier is quoted.
pub(crate) fn normalize_ident(id: &Ident) -> String {
match id.quote_style {
Some(_) => id.value.clone(),
None => id.value.to_ascii_lowercase(),
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down

0 comments on commit d014ff2

Please sign in to comment.