Skip to content

Commit

Permalink
Support tokenize SQL function (#24)
Browse files Browse the repository at this point in the history
* Support tokenize SQL function

* add unit test to verify tokenize function
  • Loading branch information
lu-zhengda authored Nov 6, 2023
1 parent 867b401 commit 5e11e41
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 5 deletions.
4 changes: 2 additions & 2 deletions normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMe
if n.config.CollectComments && (token.Type == COMMENT || token.Type == MULTILINE_COMMENT) {
// Collect comments
statementMetadata.Comments = append(statementMetadata.Comments, token.Value)
} else if token.Type == IDENT || token.Type == QUOTED_IDENT {
} else if token.Type == IDENT || token.Type == QUOTED_IDENT || token.Type == FUNCTION {
tokenVal := token.Value
if token.Type == QUOTED_IDENT {
// remove all open and close quotes
Expand Down Expand Up @@ -265,7 +265,7 @@ func (n *Normalizer) isObfuscatedValueGroupable(token *Token, lastToken *Token,

func (n *Normalizer) appendWhitespace(lastToken *Token, token *Token, normalizedSQLBuilder *strings.Builder) {
// do not add a space between parentheses if RemoveSpaceBetweenParentheses is true
if n.config.RemoveSpaceBetweenParentheses && (lastToken.Value == "(" || lastToken.Value == "[") {
if n.config.RemoveSpaceBetweenParentheses && (lastToken.Type == FUNCTION || lastToken.Value == "(" || lastToken.Value == "[") {
return
}

Expand Down
4 changes: 2 additions & 2 deletions normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -946,10 +946,10 @@ func TestNormalizerWithoutSpaceBetweenParentheses(t *testing.T) {
}{
{
input: "SELECT count(*) FROM users",
expected: "SELECT count (*) FROM users",
expected: "SELECT count(*) FROM users",
},
{
input: "SELECT * FROM users WHERE id IN(?, ?)",
input: "SELECT * FROM users WHERE id IN (?, ?)",
expected: "SELECT * FROM users WHERE id IN (?)",
},
{
Expand Down
6 changes: 5 additions & 1 deletion sqllexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
DOLLAR_QUOTED_STRING // dollar quoted string
POSITIONAL_PARAMETER // numbered parameter
BIND_PARAMETER // bind parameter
FUNCTION // function
UNKNOWN // unknown token
)

Expand Down Expand Up @@ -305,7 +306,10 @@ func (s *Lexer) scanIdentifier(ch rune) Token {
for isLetter(ch) || isDigit(ch) || ch == '.' || ch == '?' || ch == '$' || ch == '#' {
ch = s.nextBy(utf8.RuneLen(ch))
}
// return the token as uppercase so that we can do case insensitive matching
if ch == '(' {
// if the identifier is followed by a (, then it's a function
return Token{FUNCTION, s.src[s.start:s.cursor]}
}
return Token{IDENT, s.src[s.start:s.cursor]}
}

Expand Down
16 changes: 16 additions & 0 deletions sqllexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,22 @@ func TestLexer(t *testing.T) {
},
lexerOpts: []lexerOption{WithDBMS(DBMSMySQL)},
},
{
name: "Tokenize function",
input: "SELECT count(*) FROM users",
expected: []Token{
{IDENT, "SELECT"},
{WS, " "},
{FUNCTION, "count"},
{PUNCTUATION, "("},
{WILDCARD, "*"},
{PUNCTUATION, ")"},
{WS, " "},
{IDENT, "FROM"},
{WS, " "},
{IDENT, "users"},
},
},
}

for _, tt := range tests {
Expand Down

0 comments on commit 5e11e41

Please sign in to comment.