From ac6bac41cde1e46456144dddc1491ec3d48fd480 Mon Sep 17 00:00:00 2001 From: Chloe Date: Thu, 9 Apr 2020 20:13:47 -0700 Subject: [PATCH] Anonymize sensitive data in queries exposed to RestSqlAction logs (#419) * remove sensitive data from queries for logging * Added rule to mask sensitive data from es logs * Applied API in SQLUtils to rebuild query string from AST; replace data masks with anonymous words * Inlined log message; added doc for new rule --- .../sql/plugin/RestSqlAction.java | 4 +- .../sql/query/ESActionFactory.java | 18 +--- .../AnonymizeSensitiveDataRule.java | 78 +++++++++++++++ .../sql/utils/QueryDataAnonymizer.java | 55 +++++++++++ .../sql/utils/Util.java | 14 +++ .../utils/QueryDataAnonymizerTest.java | 98 +++++++++++++++++++ 6 files changed, 250 insertions(+), 17 deletions(-) create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/sql/rewriter/identifier/AnonymizeSensitiveDataRule.java create mode 100644 src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/QueryDataAnonymizer.java create mode 100644 src/test/java/com/amazon/opendistroforelasticsearch/sql/unittest/utils/QueryDataAnonymizerTest.java diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/plugin/RestSqlAction.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/plugin/RestSqlAction.java index 1dbfd9deac..14207a906f 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/sql/plugin/RestSqlAction.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/plugin/RestSqlAction.java @@ -38,6 +38,7 @@ import com.amazon.opendistroforelasticsearch.sql.rewriter.matchtoterm.VerificationException; import com.amazon.opendistroforelasticsearch.sql.utils.JsonPrettyFormatter; import com.amazon.opendistroforelasticsearch.sql.utils.LogUtils; +import com.amazon.opendistroforelasticsearch.sql.utils.QueryDataAnonymizer; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.client.Client; @@ -112,7 +113,8 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } final SqlRequest sqlRequest = SqlRequestFactory.getSqlRequest(request); - LOG.info("[{}] Incoming request {}: {}", LogUtils.getRequestId(), request.uri(), sqlRequest.getSql()); + LOG.info("[{}] Incoming request {}: {}", LogUtils.getRequestId(), request.uri(), + QueryDataAnonymizer.anonymizeData(sqlRequest.getSql())); final QueryAction queryAction = explainRequest(client, sqlRequest, SqlRequestParam.getFormat(request.params())); diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/query/ESActionFactory.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/query/ESActionFactory.java index 0bff89a659..0a8549c31f 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/sql/query/ESActionFactory.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/query/ESActionFactory.java @@ -15,7 +15,6 @@ package com.amazon.opendistroforelasticsearch.sql.query; -import com.alibaba.druid.sql.ast.SQLExpr; import com.alibaba.druid.sql.ast.expr.SQLAggregateExpr; import com.alibaba.druid.sql.ast.expr.SQLAllColumnExpr; import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr; @@ -28,10 +27,7 @@ import com.alibaba.druid.sql.dialect.mysql.ast.statement.MySqlSelectQueryBlock; import com.alibaba.druid.sql.dialect.mysql.parser.MySqlStatementParser; import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlASTVisitorAdapter; -import com.alibaba.druid.sql.parser.ParserException; -import com.alibaba.druid.sql.parser.SQLExprParser; import com.alibaba.druid.sql.parser.SQLStatementParser; -import com.alibaba.druid.sql.parser.Token; import com.amazon.opendistroforelasticsearch.sql.domain.ColumnTypeProvider; import com.amazon.opendistroforelasticsearch.sql.domain.Delete; import com.amazon.opendistroforelasticsearch.sql.domain.IndexStatement; @@ -46,7 +42,6 @@ import com.amazon.opendistroforelasticsearch.sql.executor.adapter.QueryPlanQueryAction; import com.amazon.opendistroforelasticsearch.sql.executor.adapter.QueryPlanRequestBuilder; import com.amazon.opendistroforelasticsearch.sql.parser.ElasticLexer; -import com.amazon.opendistroforelasticsearch.sql.parser.ElasticSqlExprParser; import com.amazon.opendistroforelasticsearch.sql.parser.SqlParser; import com.amazon.opendistroforelasticsearch.sql.parser.SubQueryExpression; import com.amazon.opendistroforelasticsearch.sql.query.join.ESJoinQueryActionFactory; @@ -73,6 +68,7 @@ import java.util.List; import static com.amazon.opendistroforelasticsearch.sql.domain.IndexStatement.StatementType; +import static com.amazon.opendistroforelasticsearch.sql.utils.Util.toSqlExpr; public class ESActionFactory { @@ -85,7 +81,7 @@ public static QueryAction create(Client client, String sql) * Create the compatible Query object * based on the SQL query. * - * @param sql The SQL query. + * @param request The SQL query. * @return Query object. */ public static QueryAction create(Client client, QueryActionRequest request) @@ -197,16 +193,6 @@ private static boolean isJoin(SQLQueryExpr sqlExpr, String sql) { && ((SQLJoinTableSource) query.getFrom()).getJoinType() != SQLJoinTableSource.JoinType.COMMA; } - private static SQLExpr toSqlExpr(String sql) { - SQLExprParser parser = new ElasticSqlExprParser(sql); - SQLExpr expr = parser.expr(); - - if (parser.getLexer().token() != Token.EOF) { - throw new ParserException("Illegal SQL expression : " + sql); - } - return expr; - } - @VisibleForTesting public static boolean shouldMigrateToQueryPlan(SQLQueryExpr expr, Format format) { // The JSON format will return the Elasticsearch aggregation result, which is not supported by the QueryPlanner. diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/rewriter/identifier/AnonymizeSensitiveDataRule.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/rewriter/identifier/AnonymizeSensitiveDataRule.java new file mode 100644 index 0000000000..8b1d008777 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/rewriter/identifier/AnonymizeSensitiveDataRule.java @@ -0,0 +1,78 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.rewriter.identifier; + +import com.alibaba.druid.sql.ast.expr.SQLBooleanExpr; +import com.alibaba.druid.sql.ast.expr.SQLCharExpr; +import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr; +import com.alibaba.druid.sql.ast.expr.SQLIntegerExpr; +import com.alibaba.druid.sql.ast.expr.SQLNumberExpr; +import com.alibaba.druid.sql.ast.expr.SQLQueryExpr; +import com.alibaba.druid.sql.ast.statement.SQLExprTableSource; +import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlASTVisitorAdapter; +import com.amazon.opendistroforelasticsearch.sql.rewriter.RewriteRule; + +/** + * Rewrite rule to anonymize sensitive data in logging queries. + * This rule replace the content of specific nodes (that might involve index data) in AST + * to anonymous content. + */ +public class AnonymizeSensitiveDataRule extends MySqlASTVisitorAdapter implements RewriteRule { + + @Override + public boolean visit(SQLIdentifierExpr identifierExpr) { + if (identifierExpr.getParent() instanceof SQLExprTableSource) { + identifierExpr.setName("table"); + } else { + identifierExpr.setName("identifier"); + } + return true; + } + + @Override + public boolean visit(SQLIntegerExpr integerExpr) { + integerExpr.setNumber(0); + return true; + } + + @Override + public boolean visit(SQLNumberExpr numberExpr) { + numberExpr.setNumber(0); + return true; + } + + @Override + public boolean visit(SQLCharExpr charExpr) { + charExpr.setText("string_literal"); + return true; + } + + @Override + public boolean visit(SQLBooleanExpr booleanExpr) { + booleanExpr.setValue(false); + return true; + } + + @Override + public boolean match(SQLQueryExpr expr) { + return true; + } + + @Override + public void rewrite(SQLQueryExpr expr) { + expr.accept(this); + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/QueryDataAnonymizer.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/QueryDataAnonymizer.java new file mode 100644 index 0000000000..efba0c88d1 --- /dev/null +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/QueryDataAnonymizer.java @@ -0,0 +1,55 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.utils; + +import com.alibaba.druid.sql.SQLUtils; +import com.alibaba.druid.sql.ast.expr.SQLQueryExpr; +import com.amazon.opendistroforelasticsearch.sql.rewriter.identifier.AnonymizeSensitiveDataRule; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import static com.amazon.opendistroforelasticsearch.sql.utils.Util.toSqlExpr; + +/** + * Utility class to mask sensitive information in incoming SQL queries + */ +public class QueryDataAnonymizer { + + private static final Logger LOG = LogManager.getLogger(QueryDataAnonymizer.class); + + /** + * This method is used to anonymize sensitive data in SQL query. + * Sensitive data includes index names, column names etc., + * which in druid parser are parsed to SQLIdentifierExpr instances + * @param query entire sql query string + * @return sql query string with all identifiers replaced with "***" + */ + public static String anonymizeData(String query) { + String resultQuery; + try { + AnonymizeSensitiveDataRule rule = new AnonymizeSensitiveDataRule(); + SQLQueryExpr sqlExpr = (SQLQueryExpr) toSqlExpr(query); + rule.rewrite(sqlExpr); + resultQuery = SQLUtils.toMySqlString(sqlExpr).replaceAll("0", "number") + .replaceAll("false", "boolean_literal") + .replaceAll("[\\n][\\t]+", " "); + } catch (Exception e) { + LOG.error("Caught an exception when removing sensitive data", e); + resultQuery = query; + } + return resultQuery; + } +} diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/Util.java b/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/Util.java index c24dbc8bee..048b9130dc 100644 --- a/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/Util.java +++ b/src/main/java/com/amazon/opendistroforelasticsearch/sql/utils/Util.java @@ -31,9 +31,13 @@ import com.alibaba.druid.sql.ast.statement.SQLSelectQueryBlock; import com.alibaba.druid.sql.ast.statement.SQLTableSource; import com.alibaba.druid.sql.ast.statement.SQLUnionQuery; +import com.alibaba.druid.sql.parser.ParserException; +import com.alibaba.druid.sql.parser.SQLExprParser; +import com.alibaba.druid.sql.parser.Token; import com.amazon.opendistroforelasticsearch.sql.domain.IndexStatement; import com.amazon.opendistroforelasticsearch.sql.domain.KVValue; import com.amazon.opendistroforelasticsearch.sql.exception.SqlParseException; +import com.amazon.opendistroforelasticsearch.sql.parser.ElasticSqlExprParser; import org.elasticsearch.action.admin.indices.get.GetIndexRequest; import org.elasticsearch.action.admin.indices.get.GetIndexRequestBuilder; import org.elasticsearch.client.Client; @@ -261,4 +265,14 @@ public static GetIndexRequestBuilder prepareIndexRequestBuilder(Client client, I return indexRequestBuilder; } + public static SQLExpr toSqlExpr(String sql) { + SQLExprParser parser = new ElasticSqlExprParser(sql); + SQLExpr expr = parser.expr(); + + if (parser.getLexer().token() != Token.EOF) { + throw new ParserException("Illegal SQL expression : " + sql); + } + return expr; + } + } diff --git a/src/test/java/com/amazon/opendistroforelasticsearch/sql/unittest/utils/QueryDataAnonymizerTest.java b/src/test/java/com/amazon/opendistroforelasticsearch/sql/unittest/utils/QueryDataAnonymizerTest.java new file mode 100644 index 0000000000..49c2134497 --- /dev/null +++ b/src/test/java/com/amazon/opendistroforelasticsearch/sql/unittest/utils/QueryDataAnonymizerTest.java @@ -0,0 +1,98 @@ +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.opendistroforelasticsearch.sql.unittest.utils; + +import com.amazon.opendistroforelasticsearch.sql.utils.QueryDataAnonymizer; +import org.junit.Assert; +import org.junit.Test; + +public class QueryDataAnonymizerTest { + + @Test + public void queriesShouldHaveAnonymousFieldAndIndex() { + String query = "SELECT ABS(balance) FROM accounts WHERE age > 30 GROUP BY ABS(balance)"; + String expectedQuery = "( SELECT ABS(identifier) FROM table WHERE identifier > number GROUP BY ABS(identifier) )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void queriesShouldAnonymousNumbers() { + String query = "SELECT ABS(20), LOG(20.20) FROM accounts"; + String expectedQuery = "( SELECT ABS(number), LOG(number) FROM table )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void queriesShouldHaveAnonymousBooleanLiterals() { + String query = "SELECT TRUE FROM accounts"; + String expectedQuery = "( SELECT boolean_literal FROM table )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void queriesShouldHaveAnonymousInputStrings() { + String query = "SELECT * FROM accounts WHERE name = 'Oliver'"; + String expectedQuery = "( SELECT * FROM table WHERE identifier = 'string_literal' )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void queriesWithAliasesShouldAnonymizeSensitiveData() { + String query = "SELECT balance AS b FROM accounts AS a"; + String expectedQuery = "( SELECT identifier AS b FROM table a )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void queriesWithFunctionsShouldAnonymizeSensitiveData() { + String query = "SELECT LTRIM(firstname) FROM accounts"; + String expectedQuery = "( SELECT LTRIM(identifier) FROM table )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void queriesWithAggregatesShouldAnonymizeSensitiveData() { + String query = "SELECT MAX(price) - MIN(price) from tickets"; + String expectedQuery = "( SELECT MAX(identifier) - MIN(identifier) FROM table )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void queriesWithSubqueriesShouldAnonymizeSensitiveData() { + String query = "SELECT a.f, a.l, a.a FROM " + + "(SELECT firstname AS f, lastname AS l, age AS a FROM accounts WHERE age > 30) a"; + String expectedQuery = "( SELECT identifier, identifier, identifier FROM (SELECT identifier AS f, " + + "identifier AS l, identifier AS a FROM table WHERE identifier > number ) a )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void joinQueriesShouldAnonymizeSensitiveData() { + String query = "SELECT a.account_number, a.firstname, a.lastname, e.id, e.name " + + "FROM accounts a JOIN employees e"; + String expectedQuery = "( SELECT identifier, identifier, identifier, identifier, identifier " + + "FROM table a JOIN table e )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } + + @Test + public void unionQueriesShouldAnonymizeSensitiveData() { + String query = "SELECT name, age FROM accounts UNION SELECT name, age FROM employees"; + String expectedQuery = "( SELECT identifier, identifier FROM table " + + "UNION SELECT identifier, identifier FROM table )"; + Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query)); + } +}