Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrain JSON_TABLE table function parsing to MySqlDialect and AnsiDialect #1123

Closed
wants to merge 19 commits into from
Closed
20 changes: 20 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use core::fmt::{self, Display};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use sqlparser::keywords::{ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};

Expand Down Expand Up @@ -54,6 +55,7 @@ pub use self::value::{
use crate::ast::helpers::stmt_data_loading::{
DataLoadingOptions, StageLoadSelectItem, StageParamsObject,
};
use crate::keywords::Keyword;
#[cfg(feature = "visitor")]
pub use visitor::*;

Expand Down Expand Up @@ -141,6 +143,24 @@ impl Ident {
quote_style: Some(quote),
}
}

/// If this identifier is also a keyword, return the corresponding [`Keyword`].
///
/// For example even though `AVRO` is a keyword, it can also be used as an
/// identifier for a column, such as `SELECT avro FROM my_table`.
pub fn find_keyword(&self) -> Option<Keyword> {
ALL_KEYWORDS
.iter()
.enumerate()
.find_map(|(idx, &kw)| {
if kw.to_string().to_uppercase() == self.value.to_uppercase() {
Some(idx)
} else {
None
}
})
.map(|idx| ALL_KEYWORDS_INDEX[idx])
}
}

impl From<&str> for Ident {
Expand Down
39 changes: 36 additions & 3 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2771,6 +2771,27 @@
}
}

/// If the current token is the `expected` keyword followed by
/// specified tokens, consume them and returns true.
/// Otherwise, no tokens are consumed and returns false.
pub fn parse_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool {
match self.peek_token().token {
Token::Word(w) if expected == w.keyword => {
for (idx, token) in tokens.iter().enumerate() {
if self.peek_nth_token(idx + 1).token != *token {
return false;
}
}
// consume all tokens
for _ in 0..(tokens.len() + 1) {
self.next_token();
}
true
}
_ => false,
}
}

/// If the current and subsequent tokens exactly match the `keywords`
/// sequence, consume them and returns true. Otherwise, no tokens are
/// consumed and returns false
Expand All @@ -2779,7 +2800,6 @@
let index = self.index;
for &keyword in keywords {
if !self.parse_keyword(keyword) {
// println!("parse_keywords aborting .. did not find {:?}", keyword);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be removed.

// reset index and return immediately
self.index = index;
return false;
Expand Down Expand Up @@ -7506,8 +7526,9 @@
with_offset,
with_offset_alias,
})
} else if self.parse_keyword(Keyword::JSON_TABLE) {
self.expect_token(&Token::LParen)?;
} else if dialect_of!(self is MySqlDialect | AnsiDialect)
&& self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen])
{
let json_expr = self.parse_expr()?;
self.expect_token(&Token::Comma)?;
let json_path = self.parse_value()?;
Expand All @@ -7524,8 +7545,20 @@
alias,
})
} else {
let loc = self.peek_token().location;
let name = self.parse_object_name(true)?;

for ident in &name.0 {
if ident.quote_style.is_none() {
if ident.find_keyword().is_some() {
return parser_err!(
"Cannot specify a keyword as identifier for table factor",
loc
);
}
}
}

Check failure on line 7561 in src/parser/mod.rs

View workflow job for this annotation

GitHub Actions / lint

this `if` statement can be collapsed
let partitions: Vec<Ident> = if dialect_of!(self is MySqlDialect | GenericDialect)
&& self.parse_keyword(Keyword::PARTITION)
{
Expand Down
10 changes: 5 additions & 5 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -866,13 +866,13 @@ fn parse_table_identifiers() {
vec![Ident::with_quote('`', "GROUP"), Ident::new("dataField")],
);

// TODO: this should be error
// test_table_ident_err("GROUP.dataField");
viirya marked this conversation as resolved.
Show resolved Hide resolved
test_table_ident_err("GROUP.dataField");
test_table_ident_err("abc5.GROUP");

test_table_ident(
"abc5.GROUP",
viirya marked this conversation as resolved.
Show resolved Hide resolved
"abc5.`GROUP`",
None,
vec![Ident::new("abc5"), Ident::new("GROUP")],
vec![Ident::new("abc5"), Ident::with_quote('`', "GROUP")],
);

test_table_ident(
Expand Down Expand Up @@ -1205,7 +1205,7 @@ fn parse_array_agg_func() {

#[test]
fn test_select_wildcard_with_except() {
let select = bigquery_and_generic().verified_only_select("SELECT * EXCEPT (col_a) FROM data");
let select = bigquery_and_generic().verified_only_select("SELECT * EXCEPT (col_a) FROM `data`");
let expected = SelectItem::Wildcard(WildcardAdditionalOptions {
opt_except: Some(ExceptSelectItem {
first_element: Ident::new("col_a"),
Expand Down
4 changes: 2 additions & 2 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,10 +356,10 @@ fn parse_double_equal() {
#[test]
fn parse_limit_by() {
clickhouse_and_generic().verified_stmt(
r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset"#,
r#"SELECT * FROM `default`.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset"#,
);
clickhouse_and_generic().verified_stmt(
r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset, toStartOfDay(created_at)"#,
r#"SELECT * FROM `default`.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset, toStartOfDay(created_at)"#,
);
}

Expand Down
22 changes: 22 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8407,3 +8407,25 @@ fn test_buffer_reuse() {
p.parse_statements().unwrap();
let _ = p.into_tokens();
}

#[test]
fn parse_json_table_function_err() {
let unsupported_dialects =
all_dialects_except(|d| d.is::<AnsiDialect>() || d.is::<MySqlDialect>());

// JSON_TABLE table function is not supported in the above dialects.
assert!(unsupported_dialects
.parse_sql_statements("SELECT * FROM JSON_TABLE('[[1, 2], [3, 4]]', '$[*]' COLUMNS(a INT PATH '$[0]', b INT PATH '$[1]')) AS t")
.is_err());
}

#[test]
fn parse_json_table_as_identifier() {
let parsed = all_dialects().parse_sql_statements("SELECT * FROM json_table");
assert_eq!(
ParserError::ParserError(
"Cannot specify a keyword as identifier for table factor".to_string()
),
parsed.unwrap_err()
);
Copy link
Member Author

@viirya viirya Feb 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And, even for the supported dialects, json_table cannot be used as table identifier too. Previously it cannot be too but the paring error looks confusing.

}
Loading