diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index c585917a1ed0..67fa1325eea7 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -175,7 +175,7 @@ pub(crate) type LexOrdering = Vec; /// [ WITH HEADER ROW ] /// [ DELIMITER ] /// [ COMPRESSION TYPE ] -/// [ PARTITIONED BY () ] +/// [ PARTITIONED BY ( | ) ] /// [ WITH ORDER () /// [ OPTIONS () ] /// LOCATION @@ -693,7 +693,7 @@ impl<'a> DFParser<'a> { self.parser .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parser.parse_object_name(true)?; - let (columns, constraints) = self.parse_columns()?; + let (mut columns, constraints) = self.parse_columns()?; #[derive(Default)] struct Builder { @@ -754,7 +754,30 @@ impl<'a> DFParser<'a> { Keyword::PARTITIONED => { self.parser.expect_keyword(Keyword::BY)?; ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?; - builder.table_partition_cols = Some(self.parse_partitions()?); + // Expects either list of column names (col_name [, col_name]*) + // or list of column definitions (col_name datatype [, col_name datatype]* ) + // use the token after the name to decide which parsing rule to use + // Note that mixing both names and definitions is not allowed + let peeked = self.parser.peek_nth_token(2); + if peeked == Token::Comma || peeked == Token::RParen { + // list of column names + builder.table_partition_cols = Some(self.parse_partitions()?) + } else { + // list of column defs + let (cols, cons) = self.parse_columns()?; + builder.table_partition_cols = Some( + cols.iter().map(|col| col.name.to_string()).collect(), + ); + + columns.extend(cols); + + if !cons.is_empty() { + return Err(ParserError::ParserError( + "Constraints on Partition Columns are not supported" + .to_string(), + )); + } + } } Keyword::OPTIONS => { ensure_not_set(&builder.options, "OPTIONS")?; @@ -1167,9 +1190,37 @@ mod tests { }); expect_parse_ok(sql, expected)?; - // Error cases: partition column does not support type + // positive case: column definiton allowed in 'partition by' clause let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'"; + let expected = Statement::CreateExternalTable(CreateExternalTable { + name: "t".into(), + columns: vec![ + make_column_def("c1", DataType::Int(None)), + make_column_def("p1", DataType::Int(None)), + ], + file_type: "CSV".to_string(), + has_header: false, + delimiter: ',', + location: "foo.csv".into(), + table_partition_cols: vec!["p1".to_string()], + order_exprs: vec![], + if_not_exists: false, + file_compression_type: UNCOMPRESSED, + unbounded: false, + options: HashMap::new(), + constraints: vec![], + }); + expect_parse_ok(sql, expected)?; + + // negative case: mixed column defs and column names in `PARTITIONED BY` clause + let sql = + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'"; + expect_parse_error(sql, "sql parser error: Expected a data type name, found: )"); + + // negative case: mixed column defs and column names in `PARTITIONED BY` clause + let sql = + "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'"; expect_parse_error(sql, "sql parser error: Expected ',' or ')' after partition definition, found: int"); // positive case: additional options (one entry) can be specified diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt index c4a26a5e227d..a200217af6e1 100644 --- a/datafusion/sqllogictest/test_files/create_external_table.slt +++ b/datafusion/sqllogictest/test_files/create_external_table.slt @@ -100,9 +100,17 @@ CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH LOCATION 'foo.csv'; statement error DataFusion error: SQL error: ParserError\("Unexpected token FOOBAR"\) CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV FOOBAR BARBAR BARFOO LOCATION 'foo.csv'; +# Missing partition column +statement error DataFusion error: Arrow error: Schema error: Unable to get field named "c2". Valid fields: \["c1"\] +create EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c2) LOCATION 'foo.csv' + +# Duplicate Column in `PARTITIONED BY` clause +statement error DataFusion error: Schema error: Schema contains duplicate unqualified field name c1 +create EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV PARTITIONED BY (c1 int) LOCATION 'foo.csv' + # Conflicting options statement error DataFusion error: Invalid or Unsupported Configuration: Config value "column_index_truncate_length" not found on CsvOptions CREATE EXTERNAL TABLE csv_table (column1 int) STORED AS CSV LOCATION 'foo.csv' -OPTIONS ('format.delimiter' ';', 'format.column_index_truncate_length' '123') +OPTIONS ('format.delimiter' ';', 'format.column_index_truncate_length' '123') \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index dc60bafaa8db..4b9af3bdeafb 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -42,7 +42,7 @@ LOCATION '../../testing/data/csv/aggregate_test_100.csv' statement ok -create table dictionary_encoded_values as values +create table dictionary_encoded_values as values ('a', arrow_cast('foo', 'Dictionary(Int32, Utf8)')), ('b', arrow_cast('bar', 'Dictionary(Int32, Utf8)')); query TTT @@ -55,13 +55,13 @@ statement ok CREATE EXTERNAL TABLE dictionary_encoded_parquet_partitioned( a varchar, b varchar, -) +) STORED AS parquet LOCATION 'test_files/scratch/insert_to_external/parquet_types_partitioned/' PARTITIONED BY (b); query TT -insert into dictionary_encoded_parquet_partitioned +insert into dictionary_encoded_parquet_partitioned select * from dictionary_encoded_values ---- 2 @@ -76,13 +76,13 @@ statement ok CREATE EXTERNAL TABLE dictionary_encoded_arrow_partitioned( a varchar, b varchar, -) +) STORED AS arrow LOCATION 'test_files/scratch/insert_to_external/arrow_dict_partitioned/' PARTITIONED BY (b); query TT -insert into dictionary_encoded_arrow_partitioned +insert into dictionary_encoded_arrow_partitioned select * from dictionary_encoded_values ---- 2 @@ -90,7 +90,7 @@ select * from dictionary_encoded_values statement ok CREATE EXTERNAL TABLE dictionary_encoded_arrow_test_readback( a varchar, -) +) STORED AS arrow LOCATION 'test_files/scratch/insert_to_external/arrow_dict_partitioned/b=bar/'; @@ -185,6 +185,30 @@ select * from partitioned_insert_test_verify; 1 2 +statement ok +CREATE EXTERNAL TABLE +partitioned_insert_test_hive(c bigint) +STORED AS csv +LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned' +PARTITIONED BY (a string, b string); + +query ITT +INSERT INTO partitioned_insert_test_hive VALUES (3,30,300); +---- +1 + +query ITT +SELECT * FROM partitioned_insert_test_hive order by a,b,c; +---- +1 10 100 +1 10 200 +1 20 100 +2 20 100 +1 20 200 +2 20 200 +3 30 300 + + statement ok CREATE EXTERNAL TABLE partitioned_insert_test_json(a string, b string)