Skip to content

Commit

Permalink
Merge pull request #19 from DataDog/zhengda.lu/trim-quotes
Browse files Browse the repository at this point in the history
trim quotes for collected metadata
  • Loading branch information
lu-zhengda authored Oct 25, 2023
2 parents 0e67ff7 + a85490f commit 7c204fb
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 25 deletions.
15 changes: 10 additions & 5 deletions normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,21 @@ func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMe
if n.config.CollectComments && (token.Type == COMMENT || token.Type == MULTILINE_COMMENT) {
// Collect comments
statementMetadata.Comments = append(statementMetadata.Comments, token.Value)
} else if token.Type == IDENT {
if n.config.CollectCommands && isCommand(strings.ToUpper(token.Value)) {
} else if token.Type == IDENT || token.Type == QUOTED_IDENT {
tokenVal := token.Value
if token.Type == QUOTED_IDENT {
// remove all open and close quotes
tokenVal = trimQuotes(tokenVal, tokenVal[0:1], tokenVal[len(tokenVal)-1:])
}
if n.config.CollectCommands && isCommand(strings.ToUpper(tokenVal)) {
// Collect commands
statementMetadata.Commands = append(statementMetadata.Commands, strings.ToUpper(token.Value))
statementMetadata.Commands = append(statementMetadata.Commands, strings.ToUpper(tokenVal))
} else if n.config.CollectTables && isTableIndicator(strings.ToUpper(lastToken.Value)) {
// Collect table names
statementMetadata.Tables = append(statementMetadata.Tables, token.Value)
statementMetadata.Tables = append(statementMetadata.Tables, tokenVal)
} else if n.config.CollectProcedure && isProcedure(lastToken) {
// Collect procedure names
statementMetadata.Procedures = append(statementMetadata.Procedures, token.Value)
statementMetadata.Procedures = append(statementMetadata.Procedures, tokenVal)
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,23 +188,23 @@ multiline comment */
input: `SELECT * FROM "users" WHERE id = ?`,
expected: `SELECT * FROM "users" WHERE id = ?`,
statementMetadata: StatementMetadata{
Tables: []string{`"users"`},
Tables: []string{`users`},
Comments: []string{},
Commands: []string{"SELECT"},
Procedures: []string{},
Size: 13,
Size: 11,
},
},
{
// double quoted table name
input: `SELECT * FROM "public"."users" WHERE id = ?`,
expected: `SELECT * FROM "public"."users" WHERE id = ?`,
statementMetadata: StatementMetadata{
Tables: []string{`"public"."users"`},
Tables: []string{`public.users`},
Comments: []string{},
Commands: []string{"SELECT"},
Procedures: []string{},
Size: 22,
Size: 18,
},
},
{
Expand Down
19 changes: 15 additions & 4 deletions obfuscate_and_normalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,23 +173,23 @@ multiline comment */
input: `SELECT * FROM "public"."users" WHERE id = 1`,
expected: `SELECT * FROM "public"."users" WHERE id = ?`,
statementMetadata: StatementMetadata{
Tables: []string{`"public"."users"`},
Tables: []string{"public.users"},
Comments: []string{},
Commands: []string{"SELECT"},
Procedures: []string{},
Size: 22,
Size: 18,
},
},
{
// [] quoted table name
input: `SELECT * FROM [public].[users] WHERE id = 1`,
expected: `SELECT * FROM [public].[users] WHERE id = ?`,
statementMetadata: StatementMetadata{
Tables: []string{"[public].[users]"},
Tables: []string{"public.users"},
Comments: []string{},
Commands: []string{"SELECT"},
Procedures: []string{},
Size: 22,
Size: 18,
},
lexerOpts: []lexerOption{
WithDBMS(DBMSSQLServer),
Expand Down Expand Up @@ -228,6 +228,17 @@ multiline comment */
Size: 11,
},
},
{
input: `select "user_id" from "public"."users"`,
expected: `select "user_id" from "public"."users"`,
statementMetadata: StatementMetadata{
Tables: []string{`public.users`},
Comments: []string{},
Commands: []string{"SELECT"},
Procedures: []string{},
Size: 18,
},
},
}

obfuscator := NewObfuscator(
Expand Down
3 changes: 2 additions & 1 deletion sqllexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const (
INCOMPLETE_STRING // incomplete string literal so that we can obfuscate it, e.g. 'abc
NUMBER // number literal
IDENT // identifier
QUOTED_IDENT // quoted identifier
OPERATOR // operator
WILDCARD // wildcard *
COMMENT // comment
Expand Down Expand Up @@ -335,7 +336,7 @@ func (s *Lexer) scanDoubleQuotedIdentifier(delimiter rune) Token {
ch = s.next()
}
s.next() // consume the closing quote
return Token{IDENT, s.src[s.start:s.cursor]}
return Token{QUOTED_IDENT, s.src[s.start:s.cursor]}
}

func (s *Lexer) scanWhitespace() Token {
Expand Down
22 changes: 11 additions & 11 deletions sqllexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func TestLexer(t *testing.T) {
{WS, " "},
{IDENT, "FROM"},
{WS, " "},
{IDENT, "\"users table\""},
{QUOTED_IDENT, "\"users table\""},
{WS, " "},
{IDENT, "where"},
{WS, " "},
Expand Down Expand Up @@ -413,7 +413,7 @@ func TestLexer(t *testing.T) {
{WS, " "},
{IDENT, "FROM"},
{WS, " "},
{IDENT, `"users table"`},
{QUOTED_IDENT, `"users table"`},
},
},
{
Expand All @@ -426,7 +426,7 @@ func TestLexer(t *testing.T) {
{WS, " "},
{IDENT, "FROM"},
{WS, " "},
{IDENT, `"public"."users table"`},
{QUOTED_IDENT, `"public"."users table"`},
},
},
{
Expand Down Expand Up @@ -520,15 +520,15 @@ func TestLexer(t *testing.T) {
expected: []Token{
{IDENT, "SELECT"},
{WS, " "},
{IDENT, "[user]"},
{QUOTED_IDENT, "[user]"},
{WS, " "},
{IDENT, "FROM"},
{WS, " "},
{IDENT, "[test].[table]"},
{QUOTED_IDENT, "[test].[table]"},
{WS, " "},
{IDENT, "WHERE"},
{WS, " "},
{IDENT, "[id]"},
{QUOTED_IDENT, "[id]"},
{WS, " "},
{OPERATOR, "="},
{WS, " "},
Expand All @@ -542,15 +542,15 @@ func TestLexer(t *testing.T) {
expected: []Token{
{IDENT, "SELECT"},
{WS, " "},
{IDENT, "`user`"},
{QUOTED_IDENT, "`user`"},
{WS, " "},
{IDENT, "FROM"},
{WS, " "},
{IDENT, "`test`.`table`"},
{QUOTED_IDENT, "`test`.`table`"},
{WS, " "},
{IDENT, "WHERE"},
{WS, " "},
{IDENT, "`id`"},
{QUOTED_IDENT, "`id`"},
{WS, " "},
{OPERATOR, "="},
{WS, " "},
Expand Down Expand Up @@ -614,7 +614,7 @@ func TestLexerUnicode(t *testing.T) {
{
input: `"über"`,
expected: []Token{
{IDENT, `"über"`},
{QUOTED_IDENT, `"über"`},
},
},
}
Expand All @@ -633,5 +633,5 @@ func ExampleLexer() {
lexer := New(query)
tokens := lexer.ScanAll()
fmt.Println(tokens)
// Output: [{6 SELECT} {2 } {8 *} {2 } {6 FROM} {2 } {6 users} {2 } {6 WHERE} {2 } {6 id} {2 } {7 =} {2 } {5 1}]
// Output: [{6 SELECT} {2 } {9 *} {2 } {6 FROM} {2 } {6 users} {2 } {6 WHERE} {2 } {6 id} {2 } {8 =} {2 } {5 1}]
}
5 changes: 5 additions & 0 deletions sqllexer_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,8 @@ func replaceDigits(input string, placeholder string) string {

return builder.String()
}

func trimQuotes(input string, delim string, closingDelim string) string {
replacer := strings.NewReplacer(delim, "", closingDelim, "")
return replacer.Replace(input)
}

0 comments on commit 7c204fb

Please sign in to comment.