diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index a5485a0cfb30..decdaf2574fe 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -2256,9 +2256,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "sqlparser" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "249ae674b9f636b8ff64d8bfe218774cf05a26de40fd9f358669dccc4c0a9d7d" +checksum = "9f484338c3e7772b9208d45f6e49aed0ea8439cdbe67c10ebf5505828143e113" dependencies = [ "log", ] diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 7df95532bf13..dbcfcf094799 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -46,4 +46,4 @@ cranelift-module = { version = "0.89.0", optional = true } object_store = { version = "0.5.0", default-features = false, optional = true } parquet = { version = "29.0.0", default-features = false, optional = true } pyo3 = { version = "0.17.1", optional = true } -sqlparser = "0.28" +sqlparser = "0.29" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index eaa01ca46d14..491db7bb6800 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -90,7 +90,7 @@ pyo3 = { version = "0.17.1", optional = true } rand = "0.8" rayon = { version = "1.5", optional = true } smallvec = { version = "1.6", features = ["union"] } -sqlparser = "0.28" +sqlparser = "0.29" tempfile = "3" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } tokio-stream = "0.1" @@ -111,7 +111,6 @@ env_logger = "0.10" parquet-test-utils = { path = "../../parquet-test-utils" } rstest = "0.16.0" sqllogictest = "0.10.0" -sqlparser = "0.28" test-utils = { path = "../../test-utils" } thiserror = "1.0.37" diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs index e950791c8e21..82a24a04b35d 100644 --- a/datafusion/core/tests/sql/explain_analyze.rs +++ b/datafusion/core/tests/sql/explain_analyze.rs @@ -898,8 +898,7 @@ async fn explain_nested() { .set_bool(OPT_EXPLAIN_PHYSICAL_PLAN_ONLY, explain_phy_plan_flag); let ctx = SessionContext::with_config(config); let sql = "EXPLAIN explain select 1"; - let dataframe = ctx.sql(sql).await.unwrap(); - let err = dataframe.create_physical_plan().await.unwrap_err(); + let err = ctx.sql(sql).await.unwrap_err(); assert!(err.to_string().contains("Explain must be root of the plan")); } diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 87a72f7c54c9..02809c726515 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -39,4 +39,4 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] arrow = { version = "29.0.0", default-features = false } datafusion-common = { path = "../common", version = "15.0.0" } log = "^0.4" -sqlparser = "0.28" +sqlparser = "0.29" diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 12f4534e63bb..fb1679888f99 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -41,4 +41,4 @@ arrow-schema = "29.0.0" datafusion-common = { path = "../common", version = "15.0.0" } datafusion-expr = { path = "../expr", version = "15.0.0" } log = "^0.4" -sqlparser = "0.28" +sqlparser = "0.29" diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 68e368e49807..2cc92d04ca1f 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -25,7 +25,7 @@ use sqlparser::{ }, dialect::{keywords::Keyword, Dialect, GenericDialect}, parser::{Parser, ParserError}, - tokenizer::{Token, Tokenizer}, + tokenizer::{Token, TokenWithLocation, Tokenizer}, }; use std::{collections::HashMap, str::FromStr}; use std::{collections::VecDeque, fmt}; @@ -124,7 +124,7 @@ impl<'a> DFParser<'a> { let tokens = tokenizer.tokenize()?; Ok(DFParser { - parser: Parser::new(tokens, dialect), + parser: Parser::new(dialect).with_tokens(tokens), }) } @@ -165,13 +165,17 @@ impl<'a> DFParser<'a> { } /// Report an unexpected token - fn expected(&self, expected: &str, found: Token) -> Result { + fn expected( + &self, + expected: &str, + found: TokenWithLocation, + ) -> Result { parser_err!(format!("Expected {expected}, found: {found}")) } /// Parse a new expression pub fn parse_statement(&mut self) -> Result { - match self.parser.peek_token() { + match self.parser.peek_token().token { Token::Word(w) => { match w.keyword { Keyword::CREATE => { @@ -227,7 +231,7 @@ impl<'a> DFParser<'a> { } loop { - if let Token::Word(_) = self.parser.peek_token() { + if let Token::Word(_) = self.parser.peek_token().token { let identifier = self.parser.parse_identifier()?; partitions.push(identifier.to_string()); } else { @@ -262,7 +266,7 @@ impl<'a> DFParser<'a> { loop { if let Some(constraint) = self.parser.parse_optional_table_constraint()? { constraints.push(constraint); - } else if let Token::Word(_) = self.parser.peek_token() { + } else if let Token::Word(_) = self.parser.peek_token().token { let column_def = self.parse_column_def()?; columns.push(column_def); } else { @@ -379,9 +383,10 @@ impl<'a> DFParser<'a> { /// Parses the set of valid formats fn parse_file_format(&mut self) -> Result { - match self.parser.next_token() { + let token = self.parser.next_token(); + match &token.token { Token::Word(w) => parse_file_type(&w.value), - unexpected => self.expected("one of PARQUET, NDJSON, or CSV", unexpected), + _ => self.expected("one of PARQUET, NDJSON, or CSV", token), } } @@ -389,9 +394,10 @@ impl<'a> DFParser<'a> { fn parse_file_compression_type( &mut self, ) -> Result { - match self.parser.next_token() { + let token = self.parser.next_token(); + match &token.token { Token::Word(w) => CompressionTypeVariant::from_str(&w.value), - unexpected => self.expected("one of GZIP, BZIP2, XZ", unexpected), + _ => self.expected("one of GZIP, BZIP2, XZ", token), } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 0ee5913cb64b..3bd5edbc5b3c 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -2235,15 +2235,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { normalize_ident(function.name.0[0].clone()) }; - // first, check SQL reserved words - if name == "rollup" { - let args = self.function_args_to_expr(function.args, schema)?; - return Ok(Expr::GroupingSet(GroupingSet::Rollup(args))); - } else if name == "cube" { - let args = self.function_args_to_expr(function.args, schema)?; - return Ok(Expr::GroupingSet(GroupingSet::Cube(args))); - } - // next, scalar built-in if let Ok(fun) = BuiltinScalarFunction::from_str(&name) { let args = self.function_args_to_expr(function.args, schema)?; @@ -2347,6 +2338,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } + SQLExpr::Rollup(exprs) => self.sql_rollup_to_expr(exprs, schema, planner_context), + SQLExpr::Cube(exprs) => self.sql_cube_to_expr(exprs,schema, planner_context), + SQLExpr::GroupingSets(exprs) => self.sql_grouping_sets_to_expr(exprs, schema, planner_context), + SQLExpr::Floor { expr, field: _field } => { let fun = BuiltinScalarFunction::Floor; let args = vec![self.sql_expr_to_logical_expr(*expr, schema, planner_context)?]; @@ -2387,6 +2382,67 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }) } + fn sql_rollup_to_expr( + &self, + exprs: Vec>, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let args: Result> = exprs + .into_iter() + .map(|v| { + if v.len() != 1 { + Err(DataFusionError::Internal( + "Tuple expressions are not supported for Rollup expressions" + .to_string(), + )) + } else { + self.sql_expr_to_logical_expr(v[0].clone(), schema, planner_context) + } + }) + .collect(); + Ok(Expr::GroupingSet(GroupingSet::Rollup(args?))) + } + + fn sql_cube_to_expr( + &self, + exprs: Vec>, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let args: Result> = exprs + .into_iter() + .map(|v| { + if v.len() != 1 { + Err(DataFusionError::Internal( + "Tuple expressions not are supported for Cube expressions" + .to_string(), + )) + } else { + self.sql_expr_to_logical_expr(v[0].clone(), schema, planner_context) + } + }) + .collect(); + Ok(Expr::GroupingSet(GroupingSet::Cube(args?))) + } + + fn sql_grouping_sets_to_expr( + &self, + exprs: Vec>, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let args: Result>> = exprs + .into_iter() + .map(|v| { + v.into_iter() + .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context)) + .collect() + }) + .collect(); + Ok(Expr::GroupingSet(GroupingSet::GroupingSets(args?))) + } + fn parse_exists_subquery( &self, subquery: Query, @@ -2634,6 +2690,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Identifier(i) => i.to_string(), SQLExpr::Value(v) => match v { Value::SingleQuotedString(s) => s.to_string(), + Value::DollarQuotedString(s) => s.to_string(), Value::Number(_, _) | Value::Boolean(_) => v.to_string(), Value::DoubleQuotedString(_) | Value::UnQuotedString(_) @@ -5664,11 +5721,12 @@ mod tests { quick_test(sql, expected); } - #[ignore] // see https://github.com/apache/arrow-datafusion/issues/2469 #[test] fn aggregate_with_grouping_sets() { let sql = "SELECT id, state, age, COUNT(*) FROM person GROUP BY id, GROUPING SETS ((state), (state, age), (id, state))"; - let expected = "TBD"; + let expected = "Projection: person.id, person.state, person.age, COUNT(UInt8(1))\ + \n Aggregate: groupBy=[[person.id, GROUPING SETS ((person.state), (person.state, person.age), (person.id, person.state))]], aggr=[[COUNT(UInt8(1))]]\ + \n TableScan: person"; quick_test(sql, expected); }