Skip to content

Commit

Permalink
fix: Case insensitive unquoted identifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
Marko Mikulicic committed Feb 7, 2022
1 parent e4a056f commit 56a8b9a
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 11 deletions.
124 changes: 124 additions & 0 deletions datafusion/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3634,6 +3634,130 @@ mod tests {
assert_eq!(result[0].schema().metadata(), result[1].schema().metadata());
}

#[tokio::test]
async fn case_insensitive_columns_identifiers() {
// create local execution context
let mut ctx = ExecutionContext::new();

// register csv file with the execution context
ctx.register_csv(
"case_insensitive_test",
"tests/example.csv",
CsvReadOptions::new(),
)
.await
.unwrap();

let sql = "SELECT A, b FROM case_insensitive_test";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| a | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

// All identifiers should be lowercase except when quoted

let sql = "SELECT t.A as x, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A AS X, b FROM case_insensitive_test AS t";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t"#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);


// Order by column references should be case insensitive

let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY x";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = "SELECT t.A AS x, b FROM case_insensitive_test AS t ORDER BY X";
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| x | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);

let sql = r#"SELECT t.A AS "X", b FROM case_insensitive_test AS t ORDER BY "X""#;
let result = plan_and_collect(&mut ctx, sql)
.await
.expect("ran plan correctly");
let expected = vec![
"+---+---+",
"| X | b |",
"+---+---+",
"| 1 | 2 |",
"+---+---+",
];
assert_batches_sorted_eq!(expected, &result);
}

struct MyPhysicalPlanner {}

#[async_trait]
Expand Down
24 changes: 13 additions & 11 deletions datafusion/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SelectItem::UnnamedExpr(expr) => self.sql_to_rex(expr, schema),
SelectItem::ExprWithAlias { expr, alias } => Ok(Alias(
Box::new(self.sql_to_rex(expr, schema)?),
alias.value.clone(),
normalize_ident(alias),
)),
SelectItem::Wildcard => Ok(Expr::Wildcard),
SelectItem::QualifiedWildcard(_) => Err(DataFusionError::NotImplemented(
Expand Down Expand Up @@ -1392,6 +1392,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

SQLExpr::Identifier(ref id) => {
if id.value.starts_with('@') {
// TODO: figure out if ScalarVariables should be insensitive.
let var_names = vec![id.value.clone()];
Ok(Expr::ScalarVariable(var_names))
} else {
Expand All @@ -1401,7 +1402,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// identifier. (e.g. it is "foo.bar" not foo.bar)
Ok(Expr::Column(Column {
relation: None,
name: id.value.clone(),
name: normalize_ident(id),
}))
}
}
Expand All @@ -1418,8 +1419,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}

SQLExpr::CompoundIdentifier(ids) => {
let mut var_names: Vec<_> =
ids.iter().map(|id| id.value.clone()).collect();
let mut var_names: Vec<_> = ids.iter().map(normalize_ident).collect();

if &var_names[0][0..1] == "@" {
Ok(Expr::ScalarVariable(var_names))
Expand Down Expand Up @@ -1639,13 +1639,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// (e.g. "foo.bar") for function names yet
function.name.to_string()
} else {
// if there is a quote style, then don't normalize
// the name, otherwise normalize to lowercase
let ident = &function.name.0[0];
match ident.quote_style {
Some(_) => ident.value.clone(),
None => ident.value.to_ascii_lowercase(),
}
normalize_ident(&function.name.0[0])
};

// first, scalar built-in
Expand Down Expand Up @@ -2176,6 +2170,14 @@ pub fn convert_data_type(sql_type: &SQLDataType) -> Result<DataType> {
}
}

// Normalize an identifer to a lowercase string unless the identifier is quoted.
fn normalize_ident(id: &Ident) -> String {
match id.quote_style {
Some(_) => id.value.clone(),
None => id.value.to_ascii_lowercase(),
}
}

#[cfg(test)]
mod tests {
use functions::ScalarFunctionImplementation;
Expand Down

0 comments on commit 56a8b9a

Please sign in to comment.