Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

do not collect CTEs as tables #37

Merged
merged 1 commit into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,14 @@ func (n *Normalizer) Normalize(input string, lexerOpts ...lexerOption) (normaliz
var lastToken Token // The last token that is not whitespace or comment
var groupablePlaceholder groupablePlaceholder

ctes := make(map[string]bool) // Holds the CTEs that are currently being processed

for {
token := lexer.Scan()
if token.Type == EOF {
break
}
n.collectMetadata(&token, &lastToken, statementMetadata)
n.collectMetadata(&token, &lastToken, statementMetadata, ctes)
n.normalizeSQL(&token, &lastToken, &normalizedSQLBuilder, &groupablePlaceholder, lexerOpts...)
}

Expand All @@ -158,7 +160,7 @@ func (n *Normalizer) Normalize(input string, lexerOpts ...lexerOption) (normaliz
return n.trimNormalizedSQL(normalizedSQL), statementMetadata, nil
}

func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMetadata *StatementMetadata) {
func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMetadata *StatementMetadata, ctes map[string]bool) {
if n.config.CollectComments && (token.Type == COMMENT || token.Type == MULTILINE_COMMENT) {
// Collect comments
statementMetadata.Comments = append(statementMetadata.Comments, token.Value)
Expand All @@ -175,9 +177,14 @@ func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMe
if n.config.CollectCommands && isCommand(strings.ToUpper(tokenVal)) {
// Collect commands
statementMetadata.Commands = append(statementMetadata.Commands, strings.ToUpper(tokenVal))
} else if strings.ToUpper(lastToken.Value) == "WITH" && token.Type == IDENT {
// Collect CTEs so we can skip them later in table collection
ctes[tokenVal] = true
} else if n.config.CollectTables && isTableIndicator(strings.ToUpper(lastToken.Value)) && !isSQLKeyword(token) {
// Collect table names
statementMetadata.Tables = append(statementMetadata.Tables, tokenVal)
// Collect table names the token is not a CTE
if _, ok := ctes[tokenVal]; !ok {
statementMetadata.Tables = append(statementMetadata.Tables, tokenVal)
}
} else if n.config.CollectProcedure && isProcedure(lastToken) {
// Collect procedure names
statementMetadata.Procedures = append(statementMetadata.Procedures, tokenVal)
Expand Down
12 changes: 6 additions & 6 deletions normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,22 +200,22 @@ multiline comment */
`,
expected: "WITH cte AS ( SELECT id, name, age FROM person WHERE age > ? ) UPDATE person SET age = ? WHERE id IN ( SELECT id FROM cte ); INSERT INTO person ( name, age ) SELECT name, ? FROM cte WHERE age <= ?",
statementMetadata: StatementMetadata{
Tables: []string{"person", "cte"},
Tables: []string{"person"},
Comments: []string{},
Commands: []string{"SELECT", "UPDATE", "INSERT"},
Procedures: []string{},
Size: 27,
Size: 24,
},
},
{
input: "WITH updates AS ( UPDATE metrics_metadata SET metric_type = ? updated = ? :: timestamp, interval = ? unit_id = ? per_unit_id = ? description = ? orientation = ? integration = ? short_name = ? WHERE metric_key = ? AND org_id = ? RETURNING ? ) INSERT INTO metrics_metadata ( org_id, metric_key, metric_type, interval, unit_id, per_unit_id, description, orientation, integration, short_name ) SELECT ? WHERE NOT EXISTS ( SELECT ? FROM updates )",
expected: "WITH updates AS ( UPDATE metrics_metadata SET metric_type = ? updated = ? :: timestamp, interval = ? unit_id = ? per_unit_id = ? description = ? orientation = ? integration = ? short_name = ? WHERE metric_key = ? AND org_id = ? RETURNING ? ) INSERT INTO metrics_metadata ( org_id, metric_key, metric_type, interval, unit_id, per_unit_id, description, orientation, integration, short_name ) SELECT ? WHERE NOT EXISTS ( SELECT ? FROM updates )",
statementMetadata: StatementMetadata{
Tables: []string{"metrics_metadata", "updates"},
Tables: []string{"metrics_metadata"},
Comments: []string{},
Commands: []string{"UPDATE", "INSERT", "SELECT"},
Procedures: []string{},
Size: 41,
Size: 34,
},
},
{
Expand Down Expand Up @@ -283,11 +283,11 @@ multiline comment */
input: "/* Testing explicit table SQL expression */ WITH T1 AS (SELECT PNO , PNAME , COLOR , WEIGHT , CITY FROM P WHERE CITY = ?), T2 AS (SELECT PNO, PNAME, COLOR, WEIGHT, CITY, ? * WEIGHT AS NEW_WEIGHT, ? AS NEW_CITY FROM T1), T3 AS ( SELECT PNO , PNAME, COLOR, NEW_WEIGHT AS WEIGHT, NEW_CITY AS CITY FROM T2), T4 AS ( TABLE P EXCEPT CORRESPONDING TABLE T1) TABLE T4 UNION CORRESPONDING TABLE T3",
expected: "WITH T1 AS ( SELECT PNO, PNAME, COLOR, WEIGHT, CITY FROM P WHERE CITY = ? ), T2 AS ( SELECT PNO, PNAME, COLOR, WEIGHT, CITY, ? * WEIGHT, ? FROM T1 ), T3 AS ( SELECT PNO, PNAME, COLOR, NEW_WEIGHT, NEW_CITY FROM T2 ), T4 AS ( TABLE P EXCEPT CORRESPONDING TABLE T1 ) TABLE T4 UNION CORRESPONDING TABLE T3",
statementMetadata: StatementMetadata{
Tables: []string{"P", "T1", "T2", "T4", "T3"},
Tables: []string{"P", "T2", "T4", "T3"},
Comments: []string{"/* Testing explicit table SQL expression */"},
Commands: []string{"SELECT"},
Procedures: []string{},
Size: 58,
Size: 56,
},
},
{
Expand Down
4 changes: 3 additions & 1 deletion obfuscate_and_normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ func ObfuscateAndNormalize(input string, obfuscator *Obfuscator, normalizer *Nor
var lastToken Token // The last token that is not whitespace or comment
var groupablePlaceholder groupablePlaceholder

ctes := make(map[string]bool) // Holds the CTEs that are currently being processed

for {
token := lexer.Scan()
if token.Type == EOF {
break
}
token.Value = obfuscator.ObfuscateTokenValue(token, lexerOpts...)
normalizer.collectMetadata(&token, &lastToken, statementMetadata)
normalizer.collectMetadata(&token, &lastToken, statementMetadata, ctes)
normalizer.normalizeSQL(&token, &lastToken, &normalizedSQLBuilder, &groupablePlaceholder, lexerOpts...)
}

Expand Down
4 changes: 2 additions & 2 deletions obfuscate_and_normalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,11 +308,11 @@ multiline comment */
SELECT * FROM SILENCES WHERE ROW_NUMBER = 1;`,
expected: `WITH SILENCES AS ( SELECT LOWER ( BASE_TABLE_NAME ), CREATED_DT, SILENCE_UNTIL_DT, REASON, ROW_NUMBER ( ) OVER ( PARTITION BY LOWER ( BASE_TABLE_NAME ) ORDER BY CREATED_DT DESC ) FROM REPORTING.GENERAL.SOME_TABLE WHERE CONTAINS ( ?, LOWER ( DATACENTER_LABEL ) ) ) SELECT * FROM SILENCES WHERE ROW_NUMBER = ?`,
statementMetadata: StatementMetadata{
Tables: []string{"REPORTING.GENERAL.SOME_TABLE", "SILENCES"},
Tables: []string{"REPORTING.GENERAL.SOME_TABLE"},
Comments: []string{},
Commands: []string{"SELECT"},
Procedures: []string{},
Size: 42,
Size: 34,
},
lexerOpts: []lexerOption{
WithDBMS(DBMSSnowflake),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
{
"expected": "WITH ComplexCTE AS ( SELECT t?.id, t?.amount, ROW_NUMBER ( ) OVER ( PARTITION BY t?.customer_id ORDER BY t?.amount DESC ) FROM ( SELECT id, customer_id, status FROM orders WHERE YEAR ( order_date ) = YEAR ( GETDATE ( ) ) AND status NOT IN ( ? ) ) t? INNER JOIN ( SELECT order_id, SUM ( amount ) FROM order_details GROUP BY order_id ) t? ON t?.id = t?.order_id WHERE t?.amount > ? ), SecondCTE AS ( SELECT c?. *, c?.name, c?.region FROM ComplexCTE c? INNER JOIN customers c? ON c?.customer_id = c?.id WHERE c?.region IN ( ? ) AND c?.rn < ? ) SELECT s.id, s.name, s.amount, p.product_name, CASE WHEN s.amount > ? THEN ? ELSE ? END FROM SecondCTE s LEFT JOIN ( SELECT DISTINCT p?.order_id, p?.product_name FROM order_products p? INNER JOIN products p? ON p?.product_id = p?.id ) p ON s.id = p.order_id WHERE s.region = ? AND s.status LIKE ? ORDER BY s.amount DESC, s.name",
"statement_metadata": {
"size": 79,
"tables": ["orders", "order_details", "ComplexCTE", "customers", "SecondCTE", "order_products", "products"],
"size": 69,
"tables": ["orders", "order_details", "customers", "SecondCTE", "order_products", "products"],
"commands": ["SELECT", "JOIN"],
"comments": [],
"procedures": []
Expand Down
4 changes: 2 additions & 2 deletions testdata/mssql/delete/delete-with-cte.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
{
"expected": "WITH OldOrders AS ( SELECT id FROM orders WHERE order_date < ? ) DELETE FROM orders WHERE id IN ( SELECT id FROM OldOrders )",
"statement_metadata": {
"size": 27,
"tables": ["orders", "OldOrders"],
"size": 18,
"tables": ["orders"],
"commands": ["SELECT", "DELETE"],
"comments": [],
"procedures": []
Expand Down
4 changes: 2 additions & 2 deletions testdata/mssql/select/select-with-cte.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
{
"expected": "WITH RankedOrders AS ( SELECT o.id, o.customer_id, RANK ( ) OVER ( PARTITION BY o.customer_id ORDER BY o.amount DESC ) FROM orders o ) SELECT id FROM RankedOrders WHERE rnk = ?",
"statement_metadata": {
"size": 24,
"tables": ["orders", "RankedOrders"],
"size": 12,
"tables": ["orders"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
Expand Down
4 changes: 2 additions & 2 deletions testdata/mssql/update/update-with-cte.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
{
"expected": "WITH UpdatedOrders AS ( SELECT id FROM orders WHERE order_date < GETDATE ( ) - ? ) UPDATE o SET o.status = ? FROM orders o JOIN UpdatedOrders uo ON o.id = uo.id",
"statement_metadata": {
"size": 36,
"tables": ["orders", "o", "UpdatedOrders"],
"size": 23,
"tables": ["orders", "o"],
"commands": ["SELECT", "UPDATE", "JOIN"],
"comments": [],
"procedures": []
Expand Down
4 changes: 2 additions & 2 deletions testdata/oracle/complex/super-complex-oracle-query.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
{
"expected": "WITH ranked_sales AS ( SELECT product_id, SUM ( amount ), RANK ( ) OVER ( ORDER BY SUM ( amount ) DESC ) sales_rank FROM sales GROUP BY product_id ), dept_costs AS ( SELECT department_id, SUM ( test_amt ) FROM employees GROUP BY department_id ), latest_transactions AS ( SELECT t.account_id, t.amount, ROW_NUMBER ( ) OVER ( PARTITION BY t.account_id ORDER BY t.transaction_date DESC ) rn FROM transactions t WHERE t.transaction_date >= ADD_MONTHS ( SYSDATE, ? ) ) SELECT e.employee_id, e.last_name, e.test_amt, d.department_name, d.location_id, rs.total_sales, rs.sales_rank, lt.amount FROM employees e INNER JOIN departments d ON e.department_id = d.id LEFT JOIN ranked_sales rs ON e.product_id = rs.product_id LEFT JOIN latest_transactions lt ON e.account_id = lt.account_id AND lt.rn = ? WHERE e.hire_date > ? AND ( d.budget > ( SELECT AVG ( total_sal ) FROM dept_costs ) OR e.test_amt > ( SELECT AVG ( test_amt ) FROM employees WHERE department_id = e.department_id ) ) AND EXISTS ( SELECT ? FROM customer_orders co WHERE co.employee_id = e.employee_id AND co.order_status = ? ) ORDER BY e.department_id, e.test_amt DESC",
"statement_metadata": {
"size": 103,
"tables": ["sales", "employees", "transactions", "departments", "ranked_sales", "latest_transactions", "dept_costs", "customer_orders"],
"size": 91,
"tables": ["sales", "employees", "transactions", "departments", "latest_transactions", "dept_costs", "customer_orders"],
"commands": ["SELECT", "JOIN"],
"comments": [],
"procedures": []
Expand Down
4 changes: 2 additions & 2 deletions testdata/oracle/select/select-using-with-clause.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
{
"expected": "WITH dept_costs AS ( SELECT department_id, SUM ( test_amt ) FROM employees GROUP BY department_id ) SELECT * FROM dept_costs WHERE total_sal > ( SELECT AVG ( total_sal ) FROM dept_costs )",
"statement_metadata": {
"size": 25,
"tables": ["employees", "dept_costs"],
"size": 15,
"tables": ["employees"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
Expand Down
5 changes: 2 additions & 3 deletions testdata/postgresql/delete/delete-with-cte.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
{
"expected": "WITH deleted AS ( DELETE FROM users WHERE last_login < NOW ( ) - INTERVAL ? RETURNING * ) SELECT * FROM deleted",
"statement_metadata": {
"size": 24,
"size": 17,
"tables": [
"users",
"deleted"
"users"
],
"commands": [
"DELETE",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
{
"expected": "WITH recursive_subordinates AS ( SELECT id, manager_id FROM employees WHERE id = ? UNION ALL SELECT e.id, e.manager_id FROM employees e INNER JOIN recursive_subordinates rs ON rs.id = e.manager_id ) SELECT * FROM recursive_subordinates",
"statement_metadata": {
"size": 41,
"size": 19,
"tables": [
"employees",
"recursive_subordinates"
"employees"
],
"commands": [
"SELECT",
Expand Down
5 changes: 2 additions & 3 deletions testdata/postgresql/update/update-with-cte.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
{
"expected": "WITH updated AS ( UPDATE users SET name = ? WHERE id = ? RETURNING * ) SELECT * FROM updated",
"statement_metadata": {
"size": 24,
"size": 17,
"tables": [
"users",
"updated"
"users"
],
"commands": [
"UPDATE",
Expand Down
Loading