diff --git a/obfuscate/sql.go b/obfuscate/sql.go index 208df94ba..2a858eff8 100644 --- a/obfuscate/sql.go +++ b/obfuscate/sql.go @@ -31,7 +31,24 @@ type DiscardFilter struct{} func (f *DiscardFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) { // filters based on previous token switch lastToken { + case FilteredBracketedIdentifier: + if token != ']' { + // we haven't found the closing bracket yet, keep going + if token != ID { + // the token between the brackets *must* be an identifier, + // otherwise the query is invalid. + return LexError, nil + } + return FilteredBracketedIdentifier, nil + } + fallthrough case As: + if token == '[' { + // the identifier followed by AS is an MSSQL bracketed identifier + // and will continue to be discarded until we find the corresponding + // closing bracket counter-part. See GitHub issue #475. + return FilteredBracketedIdentifier, nil + } // prevent the next comma from being part of a GroupingFilter return FilteredComma, nil } @@ -150,7 +167,9 @@ func (t *TokenConsumer) Process(in string) (string, error) { // apply all registered filters for _, f := range t.filters { - token, buff = f.Filter(token, t.lastToken, buff) + if token, buff = f.Filter(token, t.lastToken, buff); token == LexError { + return "", errors.New("the tokenizer was unable to process the string") + } } // write the resulting buffer diff --git a/obfuscate/sql_test.go b/obfuscate/sql_test.go index 11d10d599..3b00d00b2 100644 --- a/obfuscate/sql_test.go +++ b/obfuscate/sql_test.go @@ -302,6 +302,18 @@ func TestSQLQuantizer(t *testing.T) { ` + "(@runtot := @runtot + daily_values.value) AS total FROM (SELECT @runtot:=0) AS n, `daily_values` WHERE `daily_values`.`subject_id` = 12345 AND `daily_values`.`subject_type` = 'Skippity' AND (daily_values.date BETWEEN '2018-05-09' AND '2018-06-19') HAVING value >= 0 ORDER BY date", `SELECT daily_values.*, LEAST ( ( ? - @runtot ), value ), ( @runtot := @runtot + daily_values.value ) FROM ( SELECT @runtot := ? ), daily_values WHERE daily_values . subject_id = ? AND daily_values . subject_type = ? AND ( daily_values.date BETWEEN ? AND ? ) HAVING value >= ? ORDER BY date`, }, + { + `SELECT [b].[BlogId], [b].[Name] +FROM [Blogs] AS [b] +ORDER BY [b].[Name]`, + `SELECT [ b ] . [ BlogId ], [ b ] . [ Name ] FROM [ Blogs ] ORDER BY [ b ] . [ Name ]`, + }, + { + `SELECT [b].[BlogId], [b].[Name] +FROM [Blogs] AS [b +ORDER BY [b].[Name]`, + `Non-parsable SQL query`, + }, } for _, c := range cases { diff --git a/obfuscate/sql_tokenizer.go b/obfuscate/sql_tokenizer.go index cecc1eef2..486d9d6ae 100644 --- a/obfuscate/sql_tokenizer.go +++ b/obfuscate/sql_tokenizer.go @@ -44,6 +44,10 @@ const ( // FilteredComma specifies that the token is a comma and was discarded by one // of the filters. FilteredComma = 57366 + + // FilteredBracketedIdentifier specifies that we are currently discarding + // a bracketed identifier (MSSQL). See issue https://github.com/DataDog/datadog-trace-agent/issues/475. + FilteredBracketedIdentifier = 57367 ) // Tokenizer is the struct used to generate SQL