forked from opendistro-for-elasticsearch/sql
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Anonymize sensitive data in queries exposed to RestSqlAction logs (op…
…endistro-for-elasticsearch#419) * remove sensitive data from queries for logging * Added rule to mask sensitive data from es logs * Applied API in SQLUtils to rebuild query string from AST; replace data masks with anonymous words * Inlined log message; added doc for new rule (cherry picked from commit 45fa29c)
- Loading branch information
Showing
6 changed files
with
250 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
...amazon/opendistroforelasticsearch/sql/rewriter/identifier/AnonymizeSensitiveDataRule.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* A copy of the License is located at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* or in the "license" file accompanying this file. This file is distributed | ||
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing | ||
* permissions and limitations under the License. | ||
*/ | ||
|
||
package com.amazon.opendistroforelasticsearch.sql.rewriter.identifier; | ||
|
||
import com.alibaba.druid.sql.ast.expr.SQLBooleanExpr; | ||
import com.alibaba.druid.sql.ast.expr.SQLCharExpr; | ||
import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr; | ||
import com.alibaba.druid.sql.ast.expr.SQLIntegerExpr; | ||
import com.alibaba.druid.sql.ast.expr.SQLNumberExpr; | ||
import com.alibaba.druid.sql.ast.expr.SQLQueryExpr; | ||
import com.alibaba.druid.sql.ast.statement.SQLExprTableSource; | ||
import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlASTVisitorAdapter; | ||
import com.amazon.opendistroforelasticsearch.sql.rewriter.RewriteRule; | ||
|
||
/** | ||
* Rewrite rule to anonymize sensitive data in logging queries. | ||
* This rule replace the content of specific nodes (that might involve index data) in AST | ||
* to anonymous content. | ||
*/ | ||
public class AnonymizeSensitiveDataRule extends MySqlASTVisitorAdapter implements RewriteRule<SQLQueryExpr> { | ||
|
||
@Override | ||
public boolean visit(SQLIdentifierExpr identifierExpr) { | ||
if (identifierExpr.getParent() instanceof SQLExprTableSource) { | ||
identifierExpr.setName("table"); | ||
} else { | ||
identifierExpr.setName("identifier"); | ||
} | ||
return true; | ||
} | ||
|
||
@Override | ||
public boolean visit(SQLIntegerExpr integerExpr) { | ||
integerExpr.setNumber(0); | ||
return true; | ||
} | ||
|
||
@Override | ||
public boolean visit(SQLNumberExpr numberExpr) { | ||
numberExpr.setNumber(0); | ||
return true; | ||
} | ||
|
||
@Override | ||
public boolean visit(SQLCharExpr charExpr) { | ||
charExpr.setText("string_literal"); | ||
return true; | ||
} | ||
|
||
@Override | ||
public boolean visit(SQLBooleanExpr booleanExpr) { | ||
booleanExpr.setValue(false); | ||
return true; | ||
} | ||
|
||
@Override | ||
public boolean match(SQLQueryExpr expr) { | ||
return true; | ||
} | ||
|
||
@Override | ||
public void rewrite(SQLQueryExpr expr) { | ||
expr.accept(this); | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/QueryDataAnonymizer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/* | ||
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* A copy of the License is located at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* or in the "license" file accompanying this file. This file is distributed | ||
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing | ||
* permissions and limitations under the License. | ||
*/ | ||
|
||
package com.amazon.opendistroforelasticsearch.sql.utils; | ||
|
||
import com.alibaba.druid.sql.SQLUtils; | ||
import com.alibaba.druid.sql.ast.expr.SQLQueryExpr; | ||
import com.amazon.opendistroforelasticsearch.sql.rewriter.identifier.AnonymizeSensitiveDataRule; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
|
||
import static com.amazon.opendistroforelasticsearch.sql.utils.Util.toSqlExpr; | ||
|
||
/** | ||
* Utility class to mask sensitive information in incoming SQL queries | ||
*/ | ||
public class QueryDataAnonymizer { | ||
|
||
private static final Logger LOG = LogManager.getLogger(QueryDataAnonymizer.class); | ||
|
||
/** | ||
* This method is used to anonymize sensitive data in SQL query. | ||
* Sensitive data includes index names, column names etc., | ||
* which in druid parser are parsed to SQLIdentifierExpr instances | ||
* @param query entire sql query string | ||
* @return sql query string with all identifiers replaced with "***" | ||
*/ | ||
public static String anonymizeData(String query) { | ||
String resultQuery; | ||
try { | ||
AnonymizeSensitiveDataRule rule = new AnonymizeSensitiveDataRule(); | ||
SQLQueryExpr sqlExpr = (SQLQueryExpr) toSqlExpr(query); | ||
rule.rewrite(sqlExpr); | ||
resultQuery = SQLUtils.toMySqlString(sqlExpr).replaceAll("0", "number") | ||
.replaceAll("false", "boolean_literal") | ||
.replaceAll("[\\n][\\t]+", " "); | ||
} catch (Exception e) { | ||
LOG.error("Caught an exception when removing sensitive data", e); | ||
resultQuery = query; | ||
} | ||
return resultQuery; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
98 changes: 98 additions & 0 deletions
98
...ava/com/amazon/opendistroforelasticsearch/sql/unittest/utils/QueryDataAnonymizerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* A copy of the License is located at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* or in the "license" file accompanying this file. This file is distributed | ||
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
* express or implied. See the License for the specific language governing | ||
* permissions and limitations under the License. | ||
*/ | ||
|
||
package com.amazon.opendistroforelasticsearch.sql.unittest.utils; | ||
|
||
import com.amazon.opendistroforelasticsearch.sql.utils.QueryDataAnonymizer; | ||
import org.junit.Assert; | ||
import org.junit.Test; | ||
|
||
public class QueryDataAnonymizerTest { | ||
|
||
@Test | ||
public void queriesShouldHaveAnonymousFieldAndIndex() { | ||
String query = "SELECT ABS(balance) FROM accounts WHERE age > 30 GROUP BY ABS(balance)"; | ||
String expectedQuery = "( SELECT ABS(identifier) FROM table WHERE identifier > number GROUP BY ABS(identifier) )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void queriesShouldAnonymousNumbers() { | ||
String query = "SELECT ABS(20), LOG(20.20) FROM accounts"; | ||
String expectedQuery = "( SELECT ABS(number), LOG(number) FROM table )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void queriesShouldHaveAnonymousBooleanLiterals() { | ||
String query = "SELECT TRUE FROM accounts"; | ||
String expectedQuery = "( SELECT boolean_literal FROM table )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void queriesShouldHaveAnonymousInputStrings() { | ||
String query = "SELECT * FROM accounts WHERE name = 'Oliver'"; | ||
String expectedQuery = "( SELECT * FROM table WHERE identifier = 'string_literal' )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void queriesWithAliasesShouldAnonymizeSensitiveData() { | ||
String query = "SELECT balance AS b FROM accounts AS a"; | ||
String expectedQuery = "( SELECT identifier AS b FROM table a )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void queriesWithFunctionsShouldAnonymizeSensitiveData() { | ||
String query = "SELECT LTRIM(firstname) FROM accounts"; | ||
String expectedQuery = "( SELECT LTRIM(identifier) FROM table )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void queriesWithAggregatesShouldAnonymizeSensitiveData() { | ||
String query = "SELECT MAX(price) - MIN(price) from tickets"; | ||
String expectedQuery = "( SELECT MAX(identifier) - MIN(identifier) FROM table )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void queriesWithSubqueriesShouldAnonymizeSensitiveData() { | ||
String query = "SELECT a.f, a.l, a.a FROM " + | ||
"(SELECT firstname AS f, lastname AS l, age AS a FROM accounts WHERE age > 30) a"; | ||
String expectedQuery = "( SELECT identifier, identifier, identifier FROM (SELECT identifier AS f, " + | ||
"identifier AS l, identifier AS a FROM table WHERE identifier > number ) a )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void joinQueriesShouldAnonymizeSensitiveData() { | ||
String query = "SELECT a.account_number, a.firstname, a.lastname, e.id, e.name " + | ||
"FROM accounts a JOIN employees e"; | ||
String expectedQuery = "( SELECT identifier, identifier, identifier, identifier, identifier " + | ||
"FROM table a JOIN table e )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
|
||
@Test | ||
public void unionQueriesShouldAnonymizeSensitiveData() { | ||
String query = "SELECT name, age FROM accounts UNION SELECT name, age FROM employees"; | ||
String expectedQuery = "( SELECT identifier, identifier FROM table " + | ||
"UNION SELECT identifier, identifier FROM table )"; | ||
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); | ||
} | ||
} |