Skip to content

Commit

Permalink
Revert "ext_proc: Ext proc half close on destroy and defer reset till…
Browse files Browse the repository at this point in the history
… trailers received. (#37083)" (#37639)

This PR is **suspected** of causing instability in prod. The cause is not yet fully diagnosed, but it is reverted as a safety measure.

Risk Level: Low
Testing: Unit tests
Docs Changes: N/A
Release Notes: N/A
Platform Specific Features: N/A

Signed-off-by: Yan Avlasov <[email protected]>
  • Loading branch information
yanavlasov authored Dec 20, 2024
1 parent 38094df commit 66cc217
Show file tree
Hide file tree
Showing 13 changed files with 89 additions and 212 deletions.
8 changes: 1 addition & 7 deletions source/extensions/filters/http/ext_proc/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,7 @@ class ExternalProcessorStream : public StreamBase {
virtual void send(envoy::service::ext_proc::v3::ProcessingRequest&& request,
bool end_stream) PURE;
// Idempotent close. Return true if it actually closed.
// Sends a half-close from the client side.
// No further messages can be sent after this, but gRPC server may still send
// messages back.
virtual bool closeLocalStream() PURE;
virtual bool remoteClosed() const PURE;
virtual bool localClosed() const PURE;
virtual void resetStream() PURE;
virtual bool close() PURE;
virtual const StreamInfo::StreamInfo& streamInfo() const PURE;
virtual StreamInfo::StreamInfo& streamInfo() PURE;
virtual void notifyFilterDestroy() PURE;
Expand Down
12 changes: 5 additions & 7 deletions source/extensions/filters/http/ext_proc/client_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,16 @@ void ExternalProcessorStreamImpl::send(envoy::service::ext_proc::v3::ProcessingR
stream_.sendMessage(std::move(request), end_stream);
}

bool ExternalProcessorStreamImpl::closeLocalStream() {
if (!local_closed_) {
bool ExternalProcessorStreamImpl::close() {
if (!stream_closed_) {
ENVOY_LOG(debug, "Closing gRPC stream");
// Unregister the watermark callbacks, if any exist (e.g., filter is not destroyed yet)
if (grpc_side_stream_flow_control_ && callbacks_.has_value()) {
stream_.removeWatermarkCallbacks();
}
stream_.closeStream();
local_closed_ = true;
stream_closed_ = true;
stream_.resetStream();
return true;
}
return false;
Expand All @@ -94,10 +95,7 @@ void ExternalProcessorStreamImpl::onReceiveTrailingMetadata(Http::ResponseTraile
void ExternalProcessorStreamImpl::onRemoteClose(Grpc::Status::GrpcStatus status,
const std::string& message) {
ENVOY_LOG(debug, "gRPC stream closed remotely with status {}: {}", status, message);
// Also mark the client stream as closed: the underlying http async stream is
// closed and cleaned up already after processing trailers.
local_closed_ = true;
remote_closed_ = true;
stream_closed_ = true;

if (!callbacks_.has_value()) {
ENVOY_LOG(debug, "Underlying filter object has been destroyed.");
Expand Down
23 changes: 3 additions & 20 deletions source/extensions/filters/http/ext_proc/client_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class ExternalProcessorStreamImpl : public ExternalProcessorStream,
void send(ProcessingRequest&& request, bool end_stream) override;
// Close the stream. This is idempotent and will return true if we
// actually closed it.
bool closeLocalStream() override;
bool close() override;

void notifyFilterDestroy() override {
// When the filter object is being destroyed, `callbacks_` (which is a OptRef to filter object)
Expand All @@ -65,7 +65,7 @@ class ExternalProcessorStreamImpl : public ExternalProcessorStream,

// Unregister the watermark callbacks(if any) to prevent access of filter callbacks after
// the filter object is destroyed.
if (!local_closed_) {
if (!stream_closed_) {
// Remove the parent stream info to avoid a dangling reference.
stream_.streamInfo().clearParentStreamInfo();
if (grpc_side_stream_flow_control_) {
Expand All @@ -74,23 +74,9 @@ class ExternalProcessorStreamImpl : public ExternalProcessorStream,
}
}

// Returns true if the stream is closed from the Server/remote side.
bool remoteClosed() const override { return remote_closed_; }

// Returns true if the stream is closed from the Envoy/client side.
bool localClosed() const override { return local_closed_; }

// AsyncStreamCallbacks
void onReceiveMessage(ProcessingResponsePtr&& message) override;

void resetStream() override {
if (!remoteClosed()) {
stream_.resetStream();
// Mark the stream as closed from the Server/remote side.
remote_closed_ = true;
}
}

// RawAsyncStreamCallbacks
void onCreateInitialMetadata(Http::RequestHeaderMap& metadata) override;
void onReceiveInitialMetadata(Http::ResponseHeaderMapPtr&& metadata) override;
Expand All @@ -116,10 +102,7 @@ class ExternalProcessorStreamImpl : public ExternalProcessorStream,
Grpc::AsyncClient<ProcessingRequest, ProcessingResponse> client_;
Grpc::AsyncStream<ProcessingRequest> stream_;
Http::AsyncClient::ParentContext grpc_context_;
// Whether stream is closed from Envoy/client side.
bool local_closed_ = false;
// Whether the stream is closed from the Server/remote side.
bool remote_closed_ = false;
bool stream_closed_ = false;
// Boolean flag initiated by runtime flag.
const bool grpc_side_stream_flow_control_;
};
Expand Down
90 changes: 31 additions & 59 deletions source/extensions/filters/http/ext_proc/ext_proc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -424,37 +424,22 @@ void Filter::closeStream() {
if (!config_->grpcService().has_value()) {
return;
}
// Stream not opened or already cleaned up.
if (stream_ == nullptr) {
ENVOY_LOG(debug, "Stream already closed or not opened yet.");
return;
}

if (!stream_->localClosed()) {
if (stream_) {
ENVOY_LOG(debug, "Calling close on stream");
if (stream_->closeLocalStream()) {
if (stream_->close()) {
stats_.streams_closed_.inc();
}
config_->threadLocalStreamManager().erase(stream_);
stream_ = nullptr;
} else {
ENVOY_LOG(debug, "Stream already closed");
}
}

void Filter::deferredResetStream() {
ENVOY_LOG(debug, "Calling deferred cleanup on stream");
if (stream_ == nullptr) {
ENVOY_LOG(debug, "Stream already reset or not opened yet.");
return;
}
void Filter::deferredCloseStream() {
ENVOY_LOG(debug, "Calling deferred close on stream");
config_->threadLocalStreamManager().deferredErase(stream_, filter_callbacks_->dispatcher());
stream_ = nullptr;
}

void Filter::cleanupStream() {
ENVOY_LOG(debug, "Calling cleanup stream");
if (stream_ != nullptr) {
stream_->resetStream();
config_->threadLocalStreamManager().erase(stream_);
stream_ = nullptr;
}
}

void Filter::onDestroy() {
Expand All @@ -469,30 +454,23 @@ void Filter::onDestroy() {
client_->cancel();
return;
}
if (stream_ != nullptr) {
if (!stream_->localClosed()) {
// Perform immediate close on the stream otherwise.
closeStream();
}
// Defer the resource cleanup if the remote is not closed (no trailers
// received).
// This means essentially not sending a reset to the server, which
// translates into a client CANCELED error on the server side.
if (config_->observabilityMode() || !stream_->remoteClosed()) {
// In observability mode where the main stream processing and side stream
// processing are asynchronous, it is possible that filter instance is
// destroyed before the side stream request arrives at ext_proc server. In
// order to prevent the data loss in this case, side stream resetting is
// deferred upon filter destruction with a timer.

// First, release the referenced filter resource.

if (config_->observabilityMode()) {
// In observability mode where the main stream processing and side stream processing are
// asynchronous, it is possible that filter instance is destroyed before the side stream request
// arrives at ext_proc server. In order to prevent the data loss in this case, side stream
// closure is deferred upon filter destruction with a timer.

// First, release the referenced filter resource.
if (stream_ != nullptr) {
stream_->notifyFilterDestroy();
// Second, perform stream deferred closure.
deferredResetStream();
} else {
// Server closed the stream, so we can cleanup the stream immediately.
cleanupStream();
}

// Second, perform stream deferred closure.
deferredCloseStream();
} else {
// Perform immediate close on the stream otherwise.
closeStream();
}
}

Expand Down Expand Up @@ -1353,7 +1331,6 @@ void Filter::onGrpcError(Grpc::Status::GrpcStatus status) {
// make sure that they do not fire now.
onFinishProcessorCalls(status);
closeStream();
cleanupStream();
ImmediateResponse errorResponse;
errorResponse.mutable_status()->set_code(StatusCode::InternalServerError);
errorResponse.set_details(absl::StrFormat("%s_gRPC_error_%i", ErrorPrefix, status));
Expand All @@ -1365,14 +1342,10 @@ void Filter::onGrpcClose() {
ENVOY_LOG(debug, "Received gRPC stream close");

processing_complete_ = true;
stats_.streams_closed_.inc();
// Successful close. We can ignore the stream for the rest of our request
// and response processing.
closeStream();
// ExternalProcessorStreamImpl::onRemoteClose will call onGrpcClose in either
// happy path or error path.
// This will mark remote_closed_ as closed.
cleanupStream();

clearAsyncState();
}

Expand All @@ -1387,15 +1360,13 @@ void Filter::onMessageTimeout() {
// the external processor for the rest of the request.
processing_complete_ = true;
closeStream();
cleanupStream();
stats_.failure_mode_allowed_.inc();
clearAsyncState();

} else {
// Return an error and stop processing the current stream.
processing_complete_ = true;
closeStream();
cleanupStream();
decoding_state_.onFinishProcessorCall(Grpc::Status::DeadlineExceeded);
encoding_state_.onFinishProcessorCall(Grpc::Status::DeadlineExceeded);
ImmediateResponse errorResponse;
Expand Down Expand Up @@ -1537,12 +1508,13 @@ void Filter::mergePerRouteConfig() {
}
}

void DeferredDeletableStream::cleanupStreamOnTimer() {
void DeferredDeletableStream::closeStreamOnTimer() {
// Close the stream.
if (stream_ != nullptr) {
ENVOY_LOG(debug, "Resetting the stream.");
// After timeout if still no trailers received, reset the stream.
stream_->resetStream();
if (stream_) {
ENVOY_LOG(debug, "Closing the stream");
if (stream_->close()) {
stats.streams_closed_.inc();
}
// Erase this entry from the map; this will also reset the stream_ pointer.
parent.erase(stream_.get());
} else {
Expand All @@ -1553,7 +1525,7 @@ void DeferredDeletableStream::cleanupStreamOnTimer() {
// In the deferred closure mode, stream closure is deferred upon filter destruction, with a timer
// to prevent unbounded resource usage growth.
void DeferredDeletableStream::deferredClose(Envoy::Event::Dispatcher& dispatcher) {
derferred_close_timer = dispatcher.createTimer([this] { cleanupStreamOnTimer(); });
derferred_close_timer = dispatcher.createTimer([this] { closeStreamOnTimer(); });
derferred_close_timer->enableTimer(std::chrono::milliseconds(deferred_close_timeout));
}

Expand Down
13 changes: 3 additions & 10 deletions source/extensions/filters/http/ext_proc/ext_proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,7 @@ struct DeferredDeletableStream : public Logger::Loggable<Logger::Id::ext_proc> {
deferred_close_timeout(timeout) {}

void deferredClose(Envoy::Event::Dispatcher& dispatcher);
// After a timer timeouts, reset the stream, this essentially reset the
// underlying gRPC stream. Gives remote grpc server a CANCELED signal, and
// ignored any further messages/callbacks from the server.
void cleanupStreamOnTimer();
void closeStreamOnTimer();

ExternalProcessorStreamPtr stream_;
ThreadLocalStreamManager& parent;
Expand Down Expand Up @@ -194,6 +191,7 @@ class ThreadLocalStreamManager : public Envoy::ThreadLocal::ThreadLocalObject {
if (it == stream_manager_.end()) {
return;
}

it->second->deferredClose(dispatcher);
}

Expand Down Expand Up @@ -462,11 +460,6 @@ class Filter : public Logger::Loggable<Logger::Id::ext_proc>,
void mergePerRouteConfig();
StreamOpenState openStream();
void closeStream();
// Erases the stream from the threadLocalStreamManager, and reset the
// stream_ pointer and the underlying gRPC stream.
// This is called when the stream needs to be cleaned up, due to remote close
// event, or local stream timeouts.
void cleanupStream();

void onFinishProcessorCalls(Grpc::Status::GrpcStatus call_status);
void clearAsyncState();
Expand Down Expand Up @@ -504,7 +497,7 @@ class Filter : public Logger::Loggable<Logger::Id::ext_proc>,
bool end_stream);
Http::FilterDataStatus sendDataInObservabilityMode(Buffer::Instance& data, ProcessorState& state,
bool end_stream);
void deferredResetStream();
void deferredCloseStream();

envoy::service::ext_proc::v3::ProcessingRequest
buildHeaderRequest(ProcessorState& state, Http::RequestOrResponseHeaderMap& headers,
Expand Down
2 changes: 1 addition & 1 deletion test/extensions/filters/http/ext_proc/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ envoy_extension_cc_test(

envoy_extension_cc_test(
name = "filter_test",
size = "medium",
size = "small",
srcs = ["filter_test.cc"],
copts = select({
"//bazel:windows_x86_64": [],
Expand Down
Loading

0 comments on commit 66cc217

Please sign in to comment.