Skip to content

Commit

Permalink
mongo: SERVER-71292 Redact all sensitive information in query #8731
Browse files Browse the repository at this point in the history
Commit: 7be344b92d3af3fd8eb4542e594e084bed394fdc
  • Loading branch information
Jess Balint authored and sourcegraph-bot committed Nov 15, 2022
1 parent 1cd22a6 commit c201a7b
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 24 deletions.
41 changes: 28 additions & 13 deletions mongo/src/mongo/bson/bsonobj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,41 +145,56 @@ BSONObj BSONObj::getOwned(const BSONObj& obj) {
return obj.getOwned();
}

BSONObj BSONObj::redact(bool onlyEncryptedFields) const {
BSONObj BSONObj::redact(bool onlyEncryptedFields,
std::function<std::string(const BSONElement&)> fieldNameRedactor) const {
_validateUnownedSize(objsize());

// Helper to get an "internal function" to be able to do recursion
struct redactor {
void appendRedactedElem(BSONObjBuilder& builder, const BSONElement& e, bool appendMask) {
void appendRedactedElem(BSONObjBuilder& builder,
const StringData& fieldNameString,
bool appendMask) {
if (appendMask) {
builder.append(e.fieldNameStringData(), "###"_sd);
builder.append(fieldNameString, "###"_sd);
} else {
builder.appendNull(e.fieldNameStringData());
builder.appendNull(fieldNameString);
}
}

void operator()(BSONObjBuilder& builder,
const BSONObj& obj,
bool appendMask,
bool onlyEncryptedFields) {
bool onlyEncryptedFields,
std::function<std::string(const BSONElement&)> fieldNameRedactor) {
for (BSONElement e : obj) {
StringData fieldNameString;
// Temporarily allocated string that must live long enough to be copied by builder.
std::string tempString;
if (!fieldNameRedactor) {
fieldNameString = e.fieldNameStringData();
} else {
tempString = fieldNameRedactor(e);
fieldNameString = {tempString};
}
if (e.type() == Object) {
BSONObjBuilder subBuilder = builder.subobjStart(e.fieldNameStringData());
operator()(subBuilder, e.Obj(), appendMask, onlyEncryptedFields);
BSONObjBuilder subBuilder = builder.subobjStart(fieldNameString);
operator()(
subBuilder, e.Obj(), appendMask, onlyEncryptedFields, fieldNameRedactor);
subBuilder.done();
} else if (e.type() == Array) {
BSONObjBuilder subBuilder = builder.subarrayStart(e.fieldNameStringData());
operator()(subBuilder, e.Obj(), appendMask, onlyEncryptedFields);
BSONObjBuilder subBuilder = builder.subarrayStart(fieldNameString);
operator()(
subBuilder, e.Obj(), appendMask, onlyEncryptedFields, fieldNameRedactor);
subBuilder.done();
} else {
if (onlyEncryptedFields) {
if (e.type() == BinData && e.binDataType() == BinDataType::Encrypt) {
appendRedactedElem(builder, e, appendMask);
appendRedactedElem(builder, fieldNameString, appendMask);
} else {
builder.append(e);
}
} else {
appendRedactedElem(builder, e, appendMask);
appendRedactedElem(builder, fieldNameString, appendMask);
}
}
}
Expand All @@ -188,7 +203,7 @@ BSONObj BSONObj::redact(bool onlyEncryptedFields) const {

try {
BSONObjBuilder builder;
redactor()(builder, *this, /*appendMask=*/true, onlyEncryptedFields);
redactor()(builder, *this, /*appendMask=*/true, onlyEncryptedFields, fieldNameRedactor);
return builder.obj();
} catch (const ExceptionFor<ErrorCodes::BSONObjectTooLarge>&) {
}
Expand All @@ -198,7 +213,7 @@ BSONObj BSONObj::redact(bool onlyEncryptedFields) const {
// we use BSONType::jstNull, which ensures the redacted object will not be larger than the
// original.
BSONObjBuilder builder;
redactor()(builder, *this, /*appendMask=*/false, onlyEncryptedFields);
redactor()(builder, *this, /*appendMask=*/false, onlyEncryptedFields, fieldNameRedactor);
return builder.obj();
}

Expand Down
4 changes: 3 additions & 1 deletion mongo/src/mongo/bson/bsonobj.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,9 @@ class BSONObj {
/**
* @return a new full (and owned) redacted copy of the object.
*/
BSONObj redact(bool onlyEncryptedFields = false) const;
BSONObj redact(
bool onlyEncryptedFields = false,
std::function<std::string(const BSONElement&)> fieldNameRedactor = nullptr) const;

/**
* Readable representation of a BSON object in an extended JSON-style notation.
Expand Down
5 changes: 3 additions & 2 deletions mongo/src/mongo/db/pipeline/document_source_telemetry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ void DocumentSourceTelemetry::buildTelemetryStoreIterator() {
const auto partitionReadTime =
Timestamp{Timestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0)};
for (auto&& [key, metrics] : *partition) {
Document d{
{{"key", key}, {"metrics", metrics.toBSON()}, {"asOf", partitionReadTime}}};
Document d{{{"key", metrics.redactKey(key)},
{"metrics", metrics.toBSON()},
{"asOf", partitionReadTime}}};
_queue.push(std::move(d));
}
});
Expand Down
21 changes: 21 additions & 0 deletions mongo/src/mongo/db/query/query_knobs.idl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@ enums:
# Force the Bonsai optimizer for all queries.
kForceBonsai: "forceBonsai"

QueryTelemetryFieldNameRedactionStrategy:
description: "Enum for possible values of queryFieldNameRedactionStrategy."
type: string
values:
kNoRedactionStrategy: "none"
# Use the constant redaction strategy.
kConstantRedactionStrategy: "constant"
# Use a prefix of sha256 redaction strategy
kSha256RedactionStrategy: "sha256"

server_parameters:

#
Expand Down Expand Up @@ -973,6 +983,17 @@ server_parameters:
validator:
callback: telemetry_util::validateTelemetryStoreSize

internalQueryConfigureTelemetryFieldNameRedactionStrategy:
description: Choose between the "none" implementation which will not redact any fields names, the "constant"
implementation, which will replace all field names with a constant, and the "sha256" implementation, which
preserves the identify of a field.
set_at: [ startup, runtime ]
cpp_class:
name: QueryTelemetryControl
data: synchronized_value<QueryTelemetryFieldNameRedactionStrategyEnum>
default:
expr: QueryTelemetryFieldNameRedactionStrategyEnum::kConstantRedactionStrategy


# Note for adding additional query knobs:
#
Expand Down
113 changes: 106 additions & 7 deletions mongo/src/mongo/db/query/telemetry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ bool shouldCollect(const ServiceContext* serviceCtx) {
if (!telemetryRateLimiter(serviceCtx)->handleRequestSlidingWindow()) {
return false;
}
// TODO SERVER-71244: check if it's a FLE collection here (maybe pass in the request)
return true;
}

Expand All @@ -195,21 +194,23 @@ void addToFindKey(BSONObjBuilder& builder, const StringData& fieldName, const BS
serializeBSONWhenNotEmpty(value.redact(false), fieldName, &builder);
}

// Call this function from inside the redact() function on every BSONElement in the BSONObj.
void throwIfEncounteringFLEPayload(BSONElement e) {
/**
* Recognize FLE payloads in a query and throw an exception if found.
*/
void throwIfEncounteringFLEPayload(const BSONElement& e) {
constexpr auto safeContentLabel = "__safeContent__"_sd;
constexpr auto fieldpath = "$__safeContent__"_sd;
if (e.type() == BSONType::Object) {
auto fieldname = e.fieldNameStringData();
uassert(ErrorCodes::EncounteredFLEPayloadWhileRedacting,
"Encountered __safeContent__, or an $_internalFle operator, which indicate a "
"rewritten FLE2 query.",
fieldname == safeContentLabel || fieldname.startsWith("$_internalFle"_sd));
fieldname != safeContentLabel && !fieldname.startsWith("$_internalFle"_sd));
} else if (e.type() == BSONType::String) {
auto val = e.valueStringData();
uassert(ErrorCodes::EncounteredFLEPayloadWhileRedacting,
"Encountered $__safeContent__ fieldpath, which indicates a rewritten FLE2 query.",
val == fieldpath);
val != fieldpath);
} else if (e.type() == BSONType::BinData && e.isBinData(BinDataType::Encrypt)) {
int len;
auto data = e.binData(len);
Expand Down Expand Up @@ -263,8 +264,104 @@ class LockedMetrics {
TelemetryStore::Partition _partitionLock;
};

/**
* Upon reading telemetry data, we redact some keys. This is the list. See
* TelemetryMetrics::redactKey().
*/
const stdx::unordered_set<std::string> kKeysToRedact = {"pipeline", "find"};

std::string sha256FieldNameHasher(const BSONElement& e) {
auto&& fieldName = e.fieldNameStringData();
auto hash = SHA256Block::computeHash({ConstDataRange(fieldName.rawData(), fieldName.size())});
return hash.toString().substr(0, 12);
}

std::string constantFieldNameHasher(const BSONElement& e) {
return {"###"};
}

/**
* Admittedly an abuse of the BSON redaction interface, we recognize FLE payloads here and avoid
* collecting telemetry for the query.
*/
std::string fleSafeFieldNameRedactor(const BSONElement& e) {
throwIfEncounteringFLEPayload(e);
// Ideally we would change interface to avoid copying here.
return e.fieldNameStringData().toString();
}

} // namespace

const BSONObj& TelemetryMetrics::redactKey(const BSONObj& key) const {
if (_redactedKey) {
return *_redactedKey;
}

auto redactionStrategy = ServerParameterSet::getNodeParameterSet()
->get<QueryTelemetryControl>(
"internalQueryConfigureTelemetryFieldNameRedactionStrategy")
->_data.get();

// The telemetry key is of the following form:
// { "<CMD_TYPE>": {...}, "namespace": "...", "applicationName": "...", ... }
//
// The part of the key we need to redact is the object in the <CMD_TYPE> element. In the case of
// an aggregate() command, it will look something like:
// > "pipeline" : [ { "$telemetry" : {} },
// { "$addFields" : { "x" : { "$someExpr" {} } } } ],
// We should preserve the top-level stage names in the pipeline but redact all field names of
// children.
//
// The find-specific key will look like so:
// > "find" : { "find" : "###", "filter" : { "_id" : { "$ne" : "###" } } },
// Again, we should preserve the top-level keys and redact all field names of children.
BSONObjBuilder redacted;
for (BSONElement e : key) {
if ((e.type() == Object || e.type() == Array) &&
kKeysToRedact.count(e.fieldNameStringData().toString()) == 1) {
auto redactor = [&](BSONObjBuilder subObj, const BSONObj& obj) {
for (BSONElement e2 : obj) {
if (e2.type() == Object) {
switch (redactionStrategy) {
case QueryTelemetryFieldNameRedactionStrategyEnum::
kSha256RedactionStrategy:
subObj.append(e2.fieldNameStringData(),
e2.Obj().redact(false, sha256FieldNameHasher));
break;
case QueryTelemetryFieldNameRedactionStrategyEnum::
kConstantRedactionStrategy:
subObj.append(e2.fieldNameStringData(),
e2.Obj().redact(false, constantFieldNameHasher));
break;
case QueryTelemetryFieldNameRedactionStrategyEnum::kNoRedactionStrategy:
subObj.append(e2.fieldNameStringData(), e2.Obj().redact(false));
break;
}
} else {
subObj.append(e2);
}
}
subObj.done();
};

// Now we're inside the <CMD_TYPE>:{} entry and want to preserve the top-level field
// names. If it's a [pipeline] array, we redact each element in isolation.
if (e.type() == Object) {
redactor(redacted.subobjStart(e.fieldNameStringData()), e.Obj());
} else {
BSONObjBuilder subArr = redacted.subarrayStart(e.fieldNameStringData());
for (BSONElement stage : e.Obj()) {
redactor(subArr.subobjStart(""), stage.Obj());
}
}
} else {
redacted.append(e);
}
}
_redactedKey = redacted.obj();
return *_redactedKey;
}

void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx) {
if (request.getEncryptionInformation()) {
return;
Expand All @@ -285,7 +382,8 @@ void registerAggRequest(const AggregateCommandRequest& request, OperationContext
for (auto&& stage : request.getPipeline()) {
auto el = stage.firstElement();
BSONObjBuilder stageBuilder = pipelineBuilder.subobjStart("stage"_sd);
stageBuilder.append(el.fieldNameStringData(), el.Obj().redact(false));
stageBuilder.append(el.fieldNameStringData(),
el.Obj().redact(false, fleSafeFieldNameRedactor));
stageBuilder.done();
}
pipelineBuilder.done();
Expand Down Expand Up @@ -340,7 +438,8 @@ void registerFindRequest(const FindCommandRequest& request,
auto findBson = request.toBSON({});
for (auto&& findEntry : findBson) {
if (findEntry.isABSONObj()) {
telemetryKey.append(findEntry.fieldNameStringData(), findEntry.Obj().redact(false));
telemetryKey.append(findEntry.fieldNameStringData(),
findEntry.Obj().redact(false, fleSafeFieldNameRedactor));
} else {
telemetryKey.append(findEntry.fieldNameStringData(), "###"_sd);
}
Expand Down
19 changes: 18 additions & 1 deletion mongo/src/mongo/db/query/telemetry.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ class TelemetryMetrics {
return builder.obj();
}

/**
* Redact a given telemetry key.
*/
const BSONObj& redactKey(const BSONObj& key) const;

/**
* Timestamp for when this query shape was added to the store. Set on construction.
*/
Expand All @@ -130,6 +135,12 @@ class TelemetryMetrics {
AggregatedMetric docsScanned;

AggregatedMetric keysScanned;

private:
/**
* We cache the redacted key the first time it's computed.
*/
mutable boost::optional<BSONObj> _redactedKey;
};

struct TelemetryPartitioner {
Expand All @@ -139,9 +150,15 @@ struct TelemetryPartitioner {
}
};

/**
* Average key size used to pad the metrics size. We store a cached redaction of the key in the
* TelemetryMetrics object.
*/
const size_t kAverageKeySize = 100;

struct ComputeEntrySize {
size_t operator()(const TelemetryMetrics& entry) {
return sizeof(TelemetryMetrics);
return sizeof(TelemetryMetrics) + kAverageKeySize;
}
};

Expand Down
16 changes: 16 additions & 0 deletions mongo/src/mongo/db/query/telemetry_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "mongo/base/status.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/query/partitioned_cache.h"
#include "mongo/db/query/query_knobs_gen.h"
#include "mongo/db/query/util/memory_util.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
Expand Down Expand Up @@ -82,3 +83,18 @@ const Decorable<ServiceContext>::Decoration<std::unique_ptr<OnParamChangeUpdater
telemetryStoreOnParamChangeUpdater =
ServiceContext::declareDecoration<std::unique_ptr<OnParamChangeUpdater>>();
} // namespace mongo::telemetry_util

namespace mongo {
void QueryTelemetryControl::append(OperationContext*,
BSONObjBuilder* b,
StringData name,
const boost::optional<TenantId>&) {
*b << name << QueryTelemetryFieldNameRedactionStrategy_serializer(_data.get());
}

Status QueryTelemetryControl::setFromString(StringData value, const boost::optional<TenantId>&) {
_data = QueryTelemetryFieldNameRedactionStrategy_parse(
IDLParserContext("internalQueryConfigureTelemetryFieldNameRedactionStrategy"), value);
return Status::OK();
}
} // namespace mongo

0 comments on commit c201a7b

Please sign in to comment.