From 10b89190bd62130ebd07c82d697feb0e056c2f5f Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Sat, 14 Aug 2021 17:31:46 +0200 Subject: [PATCH] Fix Issues in Netty4MessageChannelHandler (#75861) (#76531) Fixes a few rough edges in this class: * we need to always pass a flush call down the pipeline and not just conditionally if they apply to the message handler, otherwise we lose flushes e.g. when a channel becomes not-writable due to a write from off the event-loop that exceeds the outbound buffer size * this is suspected of causing recently observed intermittent and unexplained slow message writes (logged by the outbound slow logger) where a message became stuck until a subsequent message was sent (e.g. during period leader checks or so) * Pass size `0` messages down the pipeline instead of just resolving their promise to avoid unexpected behavior (though we don't make use of `0`-length writes as of today * Avoid unnecessary flushes in queued-writes loop and only flush if the channel stops being writable * Release buffers on queued writes that we fail on channel close (not doing this wasn't causing bugs today because we release the underlying bytes elsewhere but could cause trouble later) Unfortunately, I was not able to reproduce the issue in the first point reliably as the timing is really tricky. I therefore tried to make this PR as short and uncontroversial as possible. I think there's possible further improvements here and this should have been caught by a test but it's not yet clear to me how to design a reliable reproducer here. --- .../netty4/Netty4MessageChannelHandler.java | 48 +++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java index 1eae706ba9377..35d88842ec246 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4MessageChannelHandler.java @@ -99,9 +99,8 @@ public void channelWritabilityChanged(ChannelHandlerContext ctx) { @Override public void flush(ChannelHandlerContext ctx) { assert Transports.assertDefaultThreadContext(transport.getThreadPool().getThreadContext()); - Channel channel = ctx.channel(); - if (channel.isWritable() || channel.isActive() == false) { - doFlush(ctx); + if (doFlush(ctx) == false) { + ctx.flush(); } } @@ -113,16 +112,14 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { super.channelInactive(ctx); } - private void doFlush(ChannelHandlerContext ctx) { + private boolean doFlush(ChannelHandlerContext ctx) { assert ctx.executor().inEventLoop(); final Channel channel = ctx.channel(); if (channel.isActive() == false) { - if (currentWrite != null) { - currentWrite.promise.tryFailure(new ClosedChannelException()); - } failQueuedWrites(); - return; + return false; } + boolean needsFlush = true; while (channel.isWritable()) { if (currentWrite == null) { currentWrite = queuedWrites.poll(); @@ -131,11 +128,6 @@ private void doFlush(ChannelHandlerContext ctx) { break; } final WriteOperation write = currentWrite; - if (write.buf.readableBytes() == 0) { - write.promise.trySuccess(); - currentWrite = null; - continue; - } final int readableBytes = write.buf.readableBytes(); final int bufferSize = Math.min(readableBytes, 1 << 18); final int readerIndex = write.buf.readerIndex(); @@ -148,7 +140,8 @@ private void doFlush(ChannelHandlerContext ctx) { writeBuffer = write.buf; } final ChannelFuture writeFuture = ctx.write(writeBuffer); - if (sliced == false || write.buf.readableBytes() == 0) { + needsFlush = true; + if (sliced == false) { currentWrite = null; writeFuture.addListener(future -> { assert ctx.executor().inEventLoop(); @@ -166,18 +159,30 @@ private void doFlush(ChannelHandlerContext ctx) { } }); } - ctx.flush(); - if (channel.isActive() == false) { - failQueuedWrites(); - return; + if (channel.isWritable() == false) { + // try flushing to make channel writable again, loop will only continue if channel becomes writable again + ctx.flush(); + needsFlush = false; } } + if (needsFlush) { + ctx.flush(); + } + if (channel.isActive() == false) { + failQueuedWrites(); + } + return true; } private void failQueuedWrites() { + if (currentWrite != null) { + final WriteOperation current = currentWrite; + currentWrite = null; + current.failAsClosedChannel(); + } WriteOperation queuedWrite; while ((queuedWrite = queuedWrites.poll()) != null) { - queuedWrite.promise.tryFailure(new ClosedChannelException()); + queuedWrite.failAsClosedChannel(); } } @@ -191,5 +196,10 @@ private static final class WriteOperation { this.buf = buf; this.promise = promise; } + + void failAsClosedChannel() { + promise.tryFailure(new ClosedChannelException()); + buf.release(); + } } }