Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added option to remove spaces between parentheses #22

Merged
merged 1 commit into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 38 additions & 19 deletions normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ type normalizerConfig struct {
// CollectProcedure specifies whether the normalizer should extract and return procedure name as SQL metadata
CollectProcedure bool

// KeepSQLAlias reports whether SQL aliases ("AS") should be truncated.
// KeepSQLAlias specifies whether SQL aliases ("AS") should be truncated.
KeepSQLAlias bool

// UppercaseKeywords reports whether SQL keywords should be uppercased.
// UppercaseKeywords specifies whether SQL keywords should be uppercased.
UppercaseKeywords bool

// RemoveSpaceBetweenParentheses specifies whether spaces should be kept between parentheses.
// Spaces are inserted between parentheses by default. but this can be disabled by setting this to true.
RemoveSpaceBetweenParentheses bool
}

type normalizerOption func(*normalizerConfig)
Expand Down Expand Up @@ -62,6 +66,12 @@ func WithCollectProcedures(collectProcedure bool) normalizerOption {
}
}

func WithRemoveSpaceBetweenParentheses(removeSpaceBetweenParentheses bool) normalizerOption {
return func(c *normalizerConfig) {
c.RemoveSpaceBetweenParentheses = removeSpaceBetweenParentheses
}
}

type StatementMetadata struct {
Size int
Tables []string
Expand Down Expand Up @@ -189,7 +199,7 @@ func (n *Normalizer) normalizeSQL(token *Token, lastToken *Token, normalizedSQLB
// if the last token is AS and the current token is not IDENT,
// this could be a CTE like WITH ... AS (...),
// so we do not discard the current token
appendWhitespace(lastToken, token, normalizedSQLBuilder)
n.appendWhitespace(lastToken, token, normalizedSQLBuilder)
n.writeToken(lastToken, normalizedSQLBuilder)
}
}
Expand All @@ -203,7 +213,7 @@ func (n *Normalizer) normalizeSQL(token *Token, lastToken *Token, normalizedSQLB
}

// determine if we should add a whitespace
appendWhitespace(lastToken, token, normalizedSQLBuilder)
n.appendWhitespace(lastToken, token, normalizedSQLBuilder)
n.writeToken(token, normalizedSQLBuilder)

*lastToken = *token
Expand Down Expand Up @@ -242,6 +252,30 @@ func (n *Normalizer) isObfuscatedValueGroupable(token *Token, lastToken *Token,
return false
}

func (n *Normalizer) appendWhitespace(lastToken *Token, token *Token, normalizedSQLBuilder *strings.Builder) {
// do not add a space between parentheses if RemoveSpaceBetweenParentheses is true
if n.config.RemoveSpaceBetweenParentheses && (lastToken.Value == "(" || lastToken.Value == "[") {
return
}

if n.config.RemoveSpaceBetweenParentheses && (token.Value == ")" || token.Value == "]") {
return
}

switch token.Value {
case ",":
case "=":
if lastToken.Value == ":" {
// do not add a space before an equals if a colon was
// present before it.
break
}
fallthrough
default:
normalizedSQLBuilder.WriteString(" ")
}
}

func dedupeCollectedMetadata(metadata []string) (dedupedMetadata []string, size int) {
// Dedupe collected metadata
// e.g. [SELECT, JOIN, SELECT, JOIN] -> [SELECT, JOIN]
Expand All @@ -265,18 +299,3 @@ func dedupeStatementMetadata(info *StatementMetadata) {
info.Procedures, procedureSize = dedupeCollectedMetadata(info.Procedures)
info.Size += tablesSize + commentsSize + commandsSize + procedureSize
}

func appendWhitespace(lastToken *Token, token *Token, normalizedSQLBuilder *strings.Builder) {
switch token.Value {
case ",":
case "=":
if lastToken.Value == ":" {
// do not add a space before an equals if a colon was
// present before it.
break
}
fallthrough
default:
normalizedSQLBuilder.WriteString(" ")
}
}
80 changes: 76 additions & 4 deletions normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -760,19 +760,19 @@ func TestGroupObfuscatedValues(t *testing.T) {
expected string
}{
{
input: "( ? )",
input: "(?)",
expected: "( ? )",
},
{
input: "(?, ?)",
input: "( ? )",
expected: "( ? )",
},
{
input: "( ?, ?, ? )",
input: "(?, ?)",
expected: "( ? )",
},
{
input: "( ? )",
input: "( ?, ?, ? )",
expected: "( ? )",
},
{
Expand Down Expand Up @@ -815,6 +815,46 @@ func TestGroupObfuscatedValues(t *testing.T) {
input: "ANY(?, ?)",
expected: "ANY ( ? )",
},
{
input: "(?)",
expected: "( ? )",
},
{
input: "( ? )",
expected: "( ? )",
},
{
input: "(?, ?)",
expected: "( ? )",
},
{
input: "( ?, ?, ? )",
expected: "( ? )",
},
{
input: "( ?, ? )",
expected: "( ? )",
},
{
input: "( ?,?)",
expected: "( ? )",
},
{
input: "[ ? ]",
expected: "[ ? ]",
},
{
input: "[?, ?]",
expected: "[ ? ]",
},
{
input: "[ ?, ?, ? ]",
expected: "[ ? ]",
},
{
input: "[ ? ]",
expected: "[ ? ]",
},
}

for _, test := range tests {
Expand Down Expand Up @@ -899,6 +939,38 @@ func TestNormalizerStoredProcedure(t *testing.T) {
}
}

func TestNormalizerWithoutSpaceBetweenParentheses(t *testing.T) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this looks good but we might want to refactor the tests eventually to have test cases with input, expected and a set of options. That way, tests can be collocated for the same input but with different options and really clearly show the difference in expected result depending on those options.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah that is on my list.

tests := []struct {
input string
expected string
}{
{
input: "SELECT count(*) FROM users",
expected: "SELECT count (*) FROM users",
},
{
input: "SELECT * FROM users WHERE id IN(?, ?)",
expected: "SELECT * FROM users WHERE id IN (?)",
},
{
input: "INSERT INTO my_table (numbers) VALUES (array[1,2,3])",
expected: "INSERT INTO my_table (numbers) VALUES (array [1, 2, 3])",
},
{
input: "BEGIN dbms_output.enable (?); END",
expected: "BEGIN dbms_output.enable (?) ; END",
},
}

for _, test := range tests {
t.Run("", func(t *testing.T) {
normalizer := NewNormalizer(WithRemoveSpaceBetweenParentheses(true))
got, _, _ := normalizer.Normalize(test.input)
assert.Equal(t, test.expected, got)
})
}
}

func ExampleNormalizer() {
normalizer := NewNormalizer(
WithCollectComments(true),
Expand Down