Skip to content

Commit

Permalink
Anonymize sensitive data in queries exposed to RestSqlAction logs (op…
Browse files Browse the repository at this point in the history
…endistro-for-elasticsearch#419)

* remove sensitive data from queries for logging

* Added rule to mask sensitive data from es logs

* Applied API in SQLUtils to rebuild query string from AST; replace data masks with anonymous words

* Inlined log message; added doc for new rule
  • Loading branch information
chloe-zh authored Apr 10, 2020
1 parent 5e5f485 commit 45fa29c
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import com.amazon.opendistroforelasticsearch.sql.rewriter.matchtoterm.VerificationException;
import com.amazon.opendistroforelasticsearch.sql.utils.JsonPrettyFormatter;
import com.amazon.opendistroforelasticsearch.sql.utils.LogUtils;
import com.amazon.opendistroforelasticsearch.sql.utils.QueryDataAnonymizer;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.client.Client;
Expand Down Expand Up @@ -112,7 +113,8 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
}

final SqlRequest sqlRequest = SqlRequestFactory.getSqlRequest(request);
LOG.info("[{}] Incoming request {}: {}", LogUtils.getRequestId(), request.uri(), sqlRequest.getSql());
LOG.info("[{}] Incoming request {}: {}", LogUtils.getRequestId(), request.uri(),
QueryDataAnonymizer.anonymizeData(sqlRequest.getSql()));

final QueryAction queryAction =
explainRequest(client, sqlRequest, SqlRequestParam.getFormat(request.params()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

package com.amazon.opendistroforelasticsearch.sql.query;

import com.alibaba.druid.sql.ast.SQLExpr;
import com.alibaba.druid.sql.ast.expr.SQLAggregateExpr;
import com.alibaba.druid.sql.ast.expr.SQLAllColumnExpr;
import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr;
Expand All @@ -28,10 +27,7 @@
import com.alibaba.druid.sql.dialect.mysql.ast.statement.MySqlSelectQueryBlock;
import com.alibaba.druid.sql.dialect.mysql.parser.MySqlStatementParser;
import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlASTVisitorAdapter;
import com.alibaba.druid.sql.parser.ParserException;
import com.alibaba.druid.sql.parser.SQLExprParser;
import com.alibaba.druid.sql.parser.SQLStatementParser;
import com.alibaba.druid.sql.parser.Token;
import com.amazon.opendistroforelasticsearch.sql.domain.ColumnTypeProvider;
import com.amazon.opendistroforelasticsearch.sql.domain.Delete;
import com.amazon.opendistroforelasticsearch.sql.domain.IndexStatement;
Expand All @@ -46,7 +42,6 @@
import com.amazon.opendistroforelasticsearch.sql.executor.adapter.QueryPlanQueryAction;
import com.amazon.opendistroforelasticsearch.sql.executor.adapter.QueryPlanRequestBuilder;
import com.amazon.opendistroforelasticsearch.sql.parser.ElasticLexer;
import com.amazon.opendistroforelasticsearch.sql.parser.ElasticSqlExprParser;
import com.amazon.opendistroforelasticsearch.sql.parser.SqlParser;
import com.amazon.opendistroforelasticsearch.sql.parser.SubQueryExpression;
import com.amazon.opendistroforelasticsearch.sql.query.join.ESJoinQueryActionFactory;
Expand All @@ -73,6 +68,7 @@
import java.util.List;

import static com.amazon.opendistroforelasticsearch.sql.domain.IndexStatement.StatementType;
import static com.amazon.opendistroforelasticsearch.sql.utils.Util.toSqlExpr;

public class ESActionFactory {

Expand All @@ -85,7 +81,7 @@ public static QueryAction create(Client client, String sql)
* Create the compatible Query object
* based on the SQL query.
*
* @param sql The SQL query.
* @param request The SQL query.
* @return Query object.
*/
public static QueryAction create(Client client, QueryActionRequest request)
Expand Down Expand Up @@ -197,16 +193,6 @@ private static boolean isJoin(SQLQueryExpr sqlExpr, String sql) {
&& ((SQLJoinTableSource) query.getFrom()).getJoinType() != SQLJoinTableSource.JoinType.COMMA;
}

private static SQLExpr toSqlExpr(String sql) {
SQLExprParser parser = new ElasticSqlExprParser(sql);
SQLExpr expr = parser.expr();

if (parser.getLexer().token() != Token.EOF) {
throw new ParserException("Illegal SQL expression : " + sql);
}
return expr;
}

@VisibleForTesting
public static boolean shouldMigrateToQueryPlan(SQLQueryExpr expr, Format format) {
// The JSON format will return the Elasticsearch aggregation result, which is not supported by the QueryPlanner.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.rewriter.identifier;

import com.alibaba.druid.sql.ast.expr.SQLBooleanExpr;
import com.alibaba.druid.sql.ast.expr.SQLCharExpr;
import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr;
import com.alibaba.druid.sql.ast.expr.SQLIntegerExpr;
import com.alibaba.druid.sql.ast.expr.SQLNumberExpr;
import com.alibaba.druid.sql.ast.expr.SQLQueryExpr;
import com.alibaba.druid.sql.ast.statement.SQLExprTableSource;
import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlASTVisitorAdapter;
import com.amazon.opendistroforelasticsearch.sql.rewriter.RewriteRule;

/**
* Rewrite rule to anonymize sensitive data in logging queries.
* This rule replace the content of specific nodes (that might involve index data) in AST
* to anonymous content.
*/
public class AnonymizeSensitiveDataRule extends MySqlASTVisitorAdapter implements RewriteRule<SQLQueryExpr> {

@Override
public boolean visit(SQLIdentifierExpr identifierExpr) {
if (identifierExpr.getParent() instanceof SQLExprTableSource) {
identifierExpr.setName("table");
} else {
identifierExpr.setName("identifier");
}
return true;
}

@Override
public boolean visit(SQLIntegerExpr integerExpr) {
integerExpr.setNumber(0);
return true;
}

@Override
public boolean visit(SQLNumberExpr numberExpr) {
numberExpr.setNumber(0);
return true;
}

@Override
public boolean visit(SQLCharExpr charExpr) {
charExpr.setText("string_literal");
return true;
}

@Override
public boolean visit(SQLBooleanExpr booleanExpr) {
booleanExpr.setValue(false);
return true;
}

@Override
public boolean match(SQLQueryExpr expr) {
return true;
}

@Override
public void rewrite(SQLQueryExpr expr) {
expr.accept(this);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.utils;

import com.alibaba.druid.sql.SQLUtils;
import com.alibaba.druid.sql.ast.expr.SQLQueryExpr;
import com.amazon.opendistroforelasticsearch.sql.rewriter.identifier.AnonymizeSensitiveDataRule;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import static com.amazon.opendistroforelasticsearch.sql.utils.Util.toSqlExpr;

/**
* Utility class to mask sensitive information in incoming SQL queries
*/
public class QueryDataAnonymizer {

private static final Logger LOG = LogManager.getLogger(QueryDataAnonymizer.class);

/**
* This method is used to anonymize sensitive data in SQL query.
* Sensitive data includes index names, column names etc.,
* which in druid parser are parsed to SQLIdentifierExpr instances
* @param query entire sql query string
* @return sql query string with all identifiers replaced with "***"
*/
public static String anonymizeData(String query) {
String resultQuery;
try {
AnonymizeSensitiveDataRule rule = new AnonymizeSensitiveDataRule();
SQLQueryExpr sqlExpr = (SQLQueryExpr) toSqlExpr(query);
rule.rewrite(sqlExpr);
resultQuery = SQLUtils.toMySqlString(sqlExpr).replaceAll("0", "number")
.replaceAll("false", "boolean_literal")
.replaceAll("[\\n][\\t]+", " ");
} catch (Exception e) {
LOG.error("Caught an exception when removing sensitive data", e);
resultQuery = query;
}
return resultQuery;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,13 @@
import com.alibaba.druid.sql.ast.statement.SQLSelectQueryBlock;
import com.alibaba.druid.sql.ast.statement.SQLTableSource;
import com.alibaba.druid.sql.ast.statement.SQLUnionQuery;
import com.alibaba.druid.sql.parser.ParserException;
import com.alibaba.druid.sql.parser.SQLExprParser;
import com.alibaba.druid.sql.parser.Token;
import com.amazon.opendistroforelasticsearch.sql.domain.IndexStatement;
import com.amazon.opendistroforelasticsearch.sql.domain.KVValue;
import com.amazon.opendistroforelasticsearch.sql.exception.SqlParseException;
import com.amazon.opendistroforelasticsearch.sql.parser.ElasticSqlExprParser;
import org.elasticsearch.action.admin.indices.get.GetIndexRequest;
import org.elasticsearch.action.admin.indices.get.GetIndexRequestBuilder;
import org.elasticsearch.client.Client;
Expand Down Expand Up @@ -261,4 +265,14 @@ public static GetIndexRequestBuilder prepareIndexRequestBuilder(Client client, I
return indexRequestBuilder;
}

public static SQLExpr toSqlExpr(String sql) {
SQLExprParser parser = new ElasticSqlExprParser(sql);
SQLExpr expr = parser.expr();

if (parser.getLexer().token() != Token.EOF) {
throw new ParserException("Illegal SQL expression : " + sql);
}
return expr;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.sql.unittest.utils;

import com.amazon.opendistroforelasticsearch.sql.utils.QueryDataAnonymizer;
import org.junit.Assert;
import org.junit.Test;

public class QueryDataAnonymizerTest {

@Test
public void queriesShouldHaveAnonymousFieldAndIndex() {
String query = "SELECT ABS(balance) FROM accounts WHERE age > 30 GROUP BY ABS(balance)";
String expectedQuery = "( SELECT ABS(identifier) FROM table WHERE identifier > number GROUP BY ABS(identifier) )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void queriesShouldAnonymousNumbers() {
String query = "SELECT ABS(20), LOG(20.20) FROM accounts";
String expectedQuery = "( SELECT ABS(number), LOG(number) FROM table )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void queriesShouldHaveAnonymousBooleanLiterals() {
String query = "SELECT TRUE FROM accounts";
String expectedQuery = "( SELECT boolean_literal FROM table )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void queriesShouldHaveAnonymousInputStrings() {
String query = "SELECT * FROM accounts WHERE name = 'Oliver'";
String expectedQuery = "( SELECT * FROM table WHERE identifier = 'string_literal' )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void queriesWithAliasesShouldAnonymizeSensitiveData() {
String query = "SELECT balance AS b FROM accounts AS a";
String expectedQuery = "( SELECT identifier AS b FROM table a )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void queriesWithFunctionsShouldAnonymizeSensitiveData() {
String query = "SELECT LTRIM(firstname) FROM accounts";
String expectedQuery = "( SELECT LTRIM(identifier) FROM table )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void queriesWithAggregatesShouldAnonymizeSensitiveData() {
String query = "SELECT MAX(price) - MIN(price) from tickets";
String expectedQuery = "( SELECT MAX(identifier) - MIN(identifier) FROM table )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void queriesWithSubqueriesShouldAnonymizeSensitiveData() {
String query = "SELECT a.f, a.l, a.a FROM " +
"(SELECT firstname AS f, lastname AS l, age AS a FROM accounts WHERE age > 30) a";
String expectedQuery = "( SELECT identifier, identifier, identifier FROM (SELECT identifier AS f, " +
"identifier AS l, identifier AS a FROM table WHERE identifier > number ) a )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void joinQueriesShouldAnonymizeSensitiveData() {
String query = "SELECT a.account_number, a.firstname, a.lastname, e.id, e.name " +
"FROM accounts a JOIN employees e";
String expectedQuery = "( SELECT identifier, identifier, identifier, identifier, identifier " +
"FROM table a JOIN table e )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}

@Test
public void unionQueriesShouldAnonymizeSensitiveData() {
String query = "SELECT name, age FROM accounts UNION SELECT name, age FROM employees";
String expectedQuery = "( SELECT identifier, identifier FROM table " +
"UNION SELECT identifier, identifier FROM table )";
Assert.assertEquals(expectedQuery, QueryDataAnonymizer.anonymizeData(query));
}
}

0 comments on commit 45fa29c

Please sign in to comment.