diff --git a/dbms_test.go b/dbms_test.go index 2ab1516..bbecee5 100644 --- a/dbms_test.go +++ b/dbms_test.go @@ -100,6 +100,7 @@ func TestQueriesPerDBMS(t *testing.T) { UppercaseKeywords: false, RemoveSpaceBetweenParentheses: false, KeepTrailingSemicolon: false, + KeepIdentifierQuotation: false, } } @@ -120,6 +121,7 @@ func TestQueriesPerDBMS(t *testing.T) { WithUppercaseKeywords(defaultNormalizerConfig.UppercaseKeywords), WithRemoveSpaceBetweenParentheses(defaultNormalizerConfig.RemoveSpaceBetweenParentheses), WithKeepTrailingSemicolon(defaultNormalizerConfig.KeepTrailingSemicolon), + WithKeepIdentifierQuotation(defaultNormalizerConfig.KeepIdentifierQuotation), ) got, statementMetadata, err := ObfuscateAndNormalize(string(tt.Input), obfuscator, normalizer, WithDBMS(dbms)) diff --git a/normalizer.go b/normalizer.go index fc1bafa..fe437e0 100644 --- a/normalizer.go +++ b/normalizer.go @@ -31,6 +31,9 @@ type normalizerConfig struct { // The trailing semicolon is removed by default, but this can be disabled by setting this to true. // PL/SQL requires a trailing semicolon, so this should be set to true when normalizing PL/SQL. KeepTrailingSemicolon bool `json:"keep_trailing_semicolon"` + + // KeepIdentifierQuotation specifies whether the normalizer should keep the quotation of identifiers. + KeepIdentifierQuotation bool `json:"keep_identifier_quotation"` } type normalizerOption func(*normalizerConfig) @@ -83,6 +86,12 @@ func WithKeepTrailingSemicolon(keepTrailingSemicolon bool) normalizerOption { } } +func WithKeepIdentifierQuotation(keepIdentifierQuotation bool) normalizerOption { + return func(c *normalizerConfig) { + c.KeepIdentifierQuotation = keepIdentifierQuotation + } +} + type StatementMetadata struct { Size int `json:"size"` Tables []string `json:"tables"` @@ -156,8 +165,12 @@ func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMe } else if token.Type == IDENT || token.Type == QUOTED_IDENT || token.Type == FUNCTION { tokenVal := token.Value if token.Type == QUOTED_IDENT { - // remove all open and close quotes + // We always want to trim the quotes for collected metadata such as table names + // This is because the metadata is used as tags, and we don't want them to be normalized as underscores later on tokenVal = trimQuotes(tokenVal, tokenVal[0:1], tokenVal[len(tokenVal)-1:]) + if !n.config.KeepIdentifierQuotation { + token.Value = tokenVal + } } if n.config.CollectCommands && isCommand(strings.ToUpper(tokenVal)) { // Collect commands diff --git a/normalizer_test.go b/normalizer_test.go index e28a347..77df919 100644 --- a/normalizer_test.go +++ b/normalizer_test.go @@ -183,30 +183,6 @@ multiline comment */ Size: 24, }, }, - { - // double quoted table name - input: `SELECT * FROM "users" WHERE id = ?`, - expected: `SELECT * FROM "users" WHERE id = ?`, - statementMetadata: StatementMetadata{ - Tables: []string{`users`}, - Comments: []string{}, - Commands: []string{"SELECT"}, - Procedures: []string{}, - Size: 11, - }, - }, - { - // double quoted table name - input: `SELECT * FROM "public"."users" WHERE id = ?`, - expected: `SELECT * FROM "public"."users" WHERE id = ?`, - statementMetadata: StatementMetadata{ - Tables: []string{`public.users`}, - Comments: []string{}, - Commands: []string{"SELECT"}, - Procedures: []string{}, - Size: 18, - }, - }, { input: ` WITH cte AS ( @@ -609,6 +585,7 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { expected string statementMetadata StatementMetadata normalizationConfig *normalizerConfig + lexerOptions []lexerOption }{ { input: "SELECT id,name, address FROM users where id = 1", @@ -736,6 +713,158 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { KeepSQLAlias: true, }, }, + { + input: `SELECT * FROM "users" WHERE id = ?`, + expected: `SELECT * FROM users WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 11, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + }, + }, + { + input: `SELECT * FROM "users" WHERE id = ?`, + expected: `SELECT * FROM "users" WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 11, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + KeepIdentifierQuotation: true, + }, + }, + { + input: `SELECT * FROM "public"."users" WHERE id = ?`, + expected: `SELECT * FROM public.users WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`public.users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 18, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + }, + }, + { + input: `SELECT * FROM "public"."users" WHERE id = ?`, + expected: `SELECT * FROM "public"."users" WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`public.users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 18, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + KeepIdentifierQuotation: true, + }, + }, + { + input: "SELECT * FROM `public`.`users` WHERE id = ?", + expected: `SELECT * FROM public.users WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`public.users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 18, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + }, + lexerOptions: []lexerOption{ + WithDBMS(DBMSMySQL), + }, + }, + { + input: "SELECT * FROM `public`.`users` WHERE id = ?", + expected: "SELECT * FROM `public`.`users` WHERE id = ?", + statementMetadata: StatementMetadata{ + Tables: []string{`public.users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 18, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + KeepIdentifierQuotation: true, + }, + lexerOptions: []lexerOption{ + WithDBMS(DBMSMySQL), + }, + }, + { + input: `SELECT * FROM [public].[users] WHERE id = ?`, + expected: `SELECT * FROM public.users WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`public.users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 18, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + }, + lexerOptions: []lexerOption{ + WithDBMS(DBMSSQLServer), + }, + }, + { + input: `SELECT * FROM [public].[users] WHERE id = ?`, + expected: `SELECT * FROM [public].[users] WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{`public.users`}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 18, + }, + normalizationConfig: &normalizerConfig{ + CollectComments: true, + CollectCommands: true, + CollectTables: true, + KeepSQLAlias: true, + KeepIdentifierQuotation: true, + }, + lexerOptions: []lexerOption{ + WithDBMS(DBMSSQLServer), + }, + }, } for _, test := range tests { @@ -745,8 +874,9 @@ func TestNormalizeDeobfuscatedSQL(t *testing.T) { WithCollectCommands(test.normalizationConfig.CollectCommands), WithCollectTables(test.normalizationConfig.CollectTables), WithKeepSQLAlias(test.normalizationConfig.KeepSQLAlias), + WithKeepIdentifierQuotation(test.normalizationConfig.KeepIdentifierQuotation), ) - got, statementMetadata, err := normalizer.Normalize(test.input) + got, statementMetadata, err := normalizer.Normalize(test.input, test.lexerOptions...) assert.NoError(t, err) assert.Equal(t, test.expected, got) assert.Equal(t, &test.statementMetadata, statementMetadata) diff --git a/obfuscate_and_normalize_test.go b/obfuscate_and_normalize_test.go index 56fda7c..4720d7c 100644 --- a/obfuscate_and_normalize_test.go +++ b/obfuscate_and_normalize_test.go @@ -171,7 +171,7 @@ multiline comment */ { // double quoted table name input: `SELECT * FROM "public"."users" WHERE id = 1`, - expected: `SELECT * FROM "public"."users" WHERE id = ?`, + expected: `SELECT * FROM public.users WHERE id = ?`, statementMetadata: StatementMetadata{ Tables: []string{"public.users"}, Comments: []string{}, @@ -183,7 +183,7 @@ multiline comment */ { // [] quoted table name input: `SELECT * FROM [public].[users] WHERE id = 1`, - expected: `SELECT * FROM [public].[users] WHERE id = ?`, + expected: `SELECT * FROM public.users WHERE id = ?`, statementMetadata: StatementMetadata{ Tables: []string{"public.users"}, Comments: []string{}, @@ -230,7 +230,7 @@ multiline comment */ }, { input: `select "user_id" from "public"."users"`, - expected: `select "user_id" from "public"."users"`, + expected: `select user_id from public.users`, statementMetadata: StatementMetadata{ Tables: []string{`public.users`}, Comments: []string{}, @@ -254,7 +254,7 @@ multiline comment */ }, }, { - input: `SELECT * FROM users WHERE id = ? # this is a comment`, + input: `SELECT * FROM users WHERE id = 1 # this is a comment`, expected: `SELECT * FROM users WHERE id = ?`, statementMetadata: StatementMetadata{ Tables: []string{"users"}, @@ -267,6 +267,20 @@ multiline comment */ WithDBMS(DBMSMySQL), }, }, + { + input: `SELECT * FROM [世界].[测试] WHERE id = 1`, + expected: `SELECT * FROM 世界.测试 WHERE id = ?`, + statementMetadata: StatementMetadata{ + Tables: []string{"世界.测试"}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 19, + }, + lexerOpts: []lexerOption{ + WithDBMS(DBMSSQLServer), + }, + }, } obfuscator := NewObfuscator( diff --git a/testdata/oracle/select/quoted-identifiers-case-sensitive.json b/testdata/oracle/select/quoted-identifiers-case-sensitive.json new file mode 100644 index 0000000..bf49d9c --- /dev/null +++ b/testdata/oracle/select/quoted-identifiers-case-sensitive.json @@ -0,0 +1,22 @@ +{ + "input": "SELECT \"OrderId\", \"OrderDate\", \"CustomerName\" FROM \"Sales\".\"Orders\" WHERE \"OrderStatus\" = 'Shipped';", + "outputs": [ + { + "expected": "SELECT OrderId, OrderDate, CustomerName FROM Sales.Orders WHERE OrderStatus = ?", + "statement_metadata": { + "size": 18, + "tables": ["Sales.Orders"], + "commands": ["SELECT"], + "comments": [], + "procedures": [] + } + }, + { + "normalizer_config": { + "keep_identifier_quotation": true, + "Keep_trailing_semicolon": true + }, + "expected": "SELECT \"OrderId\", \"OrderDate\", \"CustomerName\" FROM \"Sales\".\"Orders\" WHERE \"OrderStatus\" = ?;" + } + ] + } \ No newline at end of file diff --git a/testdata/oracle/select/quoted-identifiers-special-characters.json b/testdata/oracle/select/quoted-identifiers-special-characters.json new file mode 100644 index 0000000..89ac3e5 --- /dev/null +++ b/testdata/oracle/select/quoted-identifiers-special-characters.json @@ -0,0 +1,22 @@ +{ + "input": "SELECT * FROM \"Sales\".\"Order-Details\" WHERE \"Product#Name\" LIKE '%Gadget%';", + "outputs": [ + { + "expected": "SELECT * FROM Sales.Order-Details WHERE Product#Name LIKE ?", + "statement_metadata": { + "size": 25, + "tables": ["Sales.Order-Details"], + "commands": ["SELECT"], + "comments": [], + "procedures": [] + } + }, + { + "normalizer_config": { + "keep_identifier_quotation": true, + "Keep_trailing_semicolon": true + }, + "expected": "SELECT * FROM \"Sales\".\"Order-Details\" WHERE \"Product#Name\" LIKE ?;" + } + ] + } \ No newline at end of file diff --git a/testdata/postgresql/select/quoted-identifiers-case-sensitive.json b/testdata/postgresql/select/quoted-identifiers-case-sensitive.json new file mode 100644 index 0000000..d9896d1 --- /dev/null +++ b/testdata/postgresql/select/quoted-identifiers-case-sensitive.json @@ -0,0 +1,21 @@ +{ + "input": "SELECT \"OrderId\", \"OrderDate\", \"CustomerName\" FROM \"Sales\".\"Orders\" WHERE \"OrderStatus\" = 'Shipped'", + "outputs": [ + { + "expected": "SELECT OrderId, OrderDate, CustomerName FROM Sales.Orders WHERE OrderStatus = ?", + "statement_metadata": { + "size": 18, + "tables": ["Sales.Orders"], + "commands": ["SELECT"], + "comments": [], + "procedures": [] + } + }, + { + "normalizer_config": { + "keep_identifier_quotation": true + }, + "expected": "SELECT \"OrderId\", \"OrderDate\", \"CustomerName\" FROM \"Sales\".\"Orders\" WHERE \"OrderStatus\" = ?" + } + ] + } \ No newline at end of file diff --git a/testdata/postgresql/select/quoted-identifiers-special-characters.json b/testdata/postgresql/select/quoted-identifiers-special-characters.json new file mode 100644 index 0000000..e7203e6 --- /dev/null +++ b/testdata/postgresql/select/quoted-identifiers-special-characters.json @@ -0,0 +1,21 @@ +{ + "input": "SELECT * FROM \"Sales\".\"Order-Details\" WHERE \"Product#Name\" LIKE '%Gadget%'", + "outputs": [ + { + "expected": "SELECT * FROM Sales.Order-Details WHERE Product#Name LIKE ?", + "statement_metadata": { + "size": 25, + "tables": ["Sales.Order-Details"], + "commands": ["SELECT"], + "comments": [], + "procedures": [] + } + }, + { + "normalizer_config": { + "keep_identifier_quotation": true + }, + "expected": "SELECT * FROM \"Sales\".\"Order-Details\" WHERE \"Product#Name\" LIKE ?" + } + ] + } \ No newline at end of file