obfuscate: account for bracketed identifiers as AS parameters

DataDog · Sep 24, 2018 · febb9a2 · febb9a2
1 parent ea7b315
commit febb9a2
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 1 deletion.
diff --git a/obfuscate/sql.go b/obfuscate/sql.go
@@ -31,7 +31,24 @@ type DiscardFilter struct{}
 func (f *DiscardFilter) Filter(token, lastToken int, buffer []byte) (int, []byte) {
 	// filters based on previous token
 	switch lastToken {
+	case FilteredBracketedIdentifier:
+		if token != ']' {
+			// we haven't found the closing bracket yet, keep going
+			if token != ID {
+				// the token between the brackets *must* be an identifier,
+				// otherwise the query is invalid.
+				return LexError, nil
+			}
+			return FilteredBracketedIdentifier, nil
+		}
+		fallthrough
 	case As:
+		if token == '[' {
+			// the identifier followed by AS is an MSSQL bracketed identifier
+			// and will continue to be discarded until we find the corresponding
+			// closing bracket counter-part. See GitHub issue #475.
+			return FilteredBracketedIdentifier, nil
+		}
 		// prevent the next comma from being part of a GroupingFilter
 		return FilteredComma, nil
 	}
@@ -150,7 +167,9 @@ func (t *TokenConsumer) Process(in string) (string, error) {
 
 		// apply all registered filters
 		for _, f := range t.filters {
-			token, buff = f.Filter(token, t.lastToken, buff)
+			if token, buff = f.Filter(token, t.lastToken, buff); token == LexError {
+				return "", errors.New("the tokenizer was unable to process the string")
+			}
 		}
 
 		// write the resulting buffer

diff --git a/obfuscate/sql_test.go b/obfuscate/sql_test.go
@@ -302,6 +302,18 @@ func TestSQLQuantizer(t *testing.T) {
                     ` + "(@runtot := @runtot + daily_values.value) AS total FROM (SELECT @runtot:=0) AS n, `daily_values`  WHERE `daily_values`.`subject_id` = 12345 AND `daily_values`.`subject_type` = 'Skippity' AND (daily_values.date BETWEEN '2018-05-09' AND '2018-06-19') HAVING value >= 0 ORDER BY date",
 			`SELECT daily_values.*, LEAST ( ( ? - @runtot ), value ), ( @runtot := @runtot + daily_values.value ) FROM ( SELECT @runtot := ? ), daily_values WHERE daily_values . subject_id = ? AND daily_values . subject_type = ? AND ( daily_values.date BETWEEN ? AND ? ) HAVING value >= ? ORDER BY date`,
 		},
+		{
+			`SELECT [b].[BlogId], [b].[Name]
+FROM [Blogs] AS [b]
+ORDER BY [b].[Name]`,
+			`SELECT [ b ] . [ BlogId ], [ b ] . [ Name ] FROM [ Blogs ] ORDER BY [ b ] . [ Name ]`,
+		},
+		{
+			`SELECT [b].[BlogId], [b].[Name]
+FROM [Blogs] AS [b
+ORDER BY [b].[Name]`,
+			`Non-parsable SQL query`,
+		},
 	}
 
 	for _, c := range cases {

diff --git a/obfuscate/sql_tokenizer.go b/obfuscate/sql_tokenizer.go
@@ -44,6 +44,10 @@ const (
 	// FilteredComma specifies that the token is a comma and was discarded by one
 	// of the filters.
 	FilteredComma = 57366
+
+	// FilteredBracketedIdentifier specifies that we are currently discarding
+	// a bracketed identifier (MSSQL). See issue https://github.com/DataDog/datadog-trace-agent/issues/475.
+	FilteredBracketedIdentifier = 57367
 )
 
 // Tokenizer is the struct used to generate SQL