Skip to content

Commit

Permalink
databricks: Initial support for Databricks dialect
Browse files Browse the repository at this point in the history
  • Loading branch information
lustefaniak committed Mar 19, 2024
1 parent d1dc596 commit c26258f
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/ast/helpers/stmt_create_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ pub struct CreateTableBuilder {
pub strict: bool,
pub table_ttl: Option<Expr>,
pub clickhouse_settings: Option<Vec<SqlOption>>,
pub using: Option<ObjectName>,
}

impl CreateTableBuilder {
Expand Down Expand Up @@ -120,6 +121,7 @@ impl CreateTableBuilder {
strict: false,
table_ttl: None,
clickhouse_settings: None,
using: None,
}
}
pub fn or_replace(mut self, or_replace: bool) -> Self {
Expand Down Expand Up @@ -285,6 +287,11 @@ impl CreateTableBuilder {
self
}

pub fn using(mut self, using: Option<ObjectName>) -> Self {
self.using = using;
self
}

pub fn build(self) -> Statement {
Statement::CreateTable {
or_replace: self.or_replace,
Expand Down Expand Up @@ -321,6 +328,7 @@ impl CreateTableBuilder {
strict: self.strict,
table_ttl: self.table_ttl,
clickhouse_settings: self.clickhouse_settings,
using: self.using,
}
}
}
Expand Down Expand Up @@ -367,6 +375,7 @@ impl TryFrom<Statement> for CreateTableBuilder {
strict,
table_ttl,
clickhouse_settings,
using,
} => Ok(Self {
or_replace,
temporary,
Expand Down Expand Up @@ -402,6 +411,7 @@ impl TryFrom<Statement> for CreateTableBuilder {
strict,
table_ttl,
clickhouse_settings,
using,
}),
_ => Err(ParserError::ParserError(format!(
"Expected create table statement, but received: {stmt}"
Expand Down
8 changes: 8 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,8 @@ pub enum Statement {
table_ttl: Option<Expr>,
/// SETTINGS k = v, k2 = v2...
clickhouse_settings: Option<Vec<SqlOption>>,
/// Databricks USING DELTA
using: Option<ObjectName>,
},
/// SQLite's `CREATE VIRTUAL TABLE .. USING <module_name> (<module_args>)`
CreateVirtualTable {
Expand Down Expand Up @@ -2631,6 +2633,7 @@ impl fmt::Display for Statement {
strict,
table_ttl,
clickhouse_settings,
using,
} => {
// We want to allow the following options
// Empty column list, allowed by PostgreSQL:
Expand Down Expand Up @@ -2675,6 +2678,11 @@ impl fmt::Display for Statement {
// PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens
write!(f, " ()")?;
}

if let Some(using) = using {
write!(f, " USING {using}")?;
}

// Only for SQLite
if *without_rowid {
write!(f, " WITHOUT ROWID")?;
Expand Down
30 changes: 30 additions & 0 deletions src/dialect/databricks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::dialect::Dialect;

#[derive(Debug)]
pub struct DatabricksDialect {}

impl Dialect for DatabricksDialect {
fn is_delimited_identifier_start(&self, ch: char) -> bool {
ch == '`'
}

fn is_identifier_start(&self, ch: char) -> bool {
ch.is_ascii_lowercase() || ch.is_ascii_uppercase()
}

fn is_identifier_part(&self, ch: char) -> bool {
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
}
}
3 changes: 3 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
mod ansi;
mod bigquery;
mod clickhouse;
mod databricks;
mod duckdb;
mod generic;
mod hive;
Expand All @@ -32,6 +33,7 @@ use core::str::Chars;
pub use self::ansi::AnsiDialect;
pub use self::bigquery::BigQueryDialect;
pub use self::clickhouse::ClickHouseDialect;
pub use self::databricks::DatabricksDialect;
pub use self::duckdb::DuckDbDialect;
pub use self::generic::GenericDialect;
pub use self::hive::HiveDialect;
Expand Down Expand Up @@ -174,6 +176,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect
"bigquery" => Some(Box::new(BigQueryDialect)),
"ansi" => Some(Box::new(AnsiDialect {})),
"duckdb" => Some(Box::new(DuckDbDialect {})),
"databricks" => Some(Box::new(DatabricksDialect {})),
_ => None,
}
}
Expand Down
8 changes: 8 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4002,13 +4002,20 @@ impl<'a> Parser<'a> {
// parse optional column list (schema)
let (columns, constraints) = self.parse_columns()?;

let using = if self.parse_keyword(Keyword::USING) {
Some(self.parse_object_name()?)
} else {
None
};

// SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE`
let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]);

let hive_distribution = self.parse_hive_distribution()?;
let hive_formats = self.parse_hive_formats()?;
// PostgreSQL supports `WITH ( options )`, before `AS`
let with_options = self.parse_options(Keyword::WITH)?;

let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?;

let engine = if self.parse_keyword(Keyword::ENGINE) {
Expand Down Expand Up @@ -4195,6 +4202,7 @@ impl<'a> Parser<'a> {
.strict(strict)
.table_ttl(table_ttl)
.clickhouse_settings(clickhouse_settings)
.using(using)
.build())
}

Expand Down
55 changes: 55 additions & 0 deletions tests/sqlparser_databricks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![warn(clippy::all)]
//! Test SQL syntax specific to Snowflake. The parser based on the
//! generic dialect is also tested (on the inputs it can handle).

#[cfg(test)]
use pretty_assertions::assert_eq;
use sqlparser::ast::*;
use sqlparser::dialect::{DatabricksDialect, GenericDialect};
use test_utils::*;

#[macro_use]
mod test_utils;

fn databricks() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(DatabricksDialect {})],
options: None,
}
}

fn databricks_and_generic() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(DatabricksDialect {}), Box::new(GenericDialect {})],
options: None,
}
}

#[test]
fn test_databricks_create_table() {
let sql = "CREATE TABLE main.dbt_lukasz.customers (customer_id BIGINT, customer_lifetime_value DOUBLE) USING delta TBLPROPERTIES ('delta.minReaderVersion' = '3', 'delta.minWriterVersion' = '7')";
match databricks_and_generic().verified_stmt(sql) {
Statement::CreateTable { name, .. } => {
assert_eq!("main.dbt_lukasz.customers", name.to_string());
}
_ => unreachable!(),
}
}

#[test]
fn test_identifiers() {
let sql = "SELECT * FROM `main`.`dbt_lukasz`.`raw_orders`";
databricks().verified_stmt(sql);
}

0 comments on commit c26258f

Please sign in to comment.