-
Notifications
You must be signed in to change notification settings - Fork 23
Fix client-outputhost fd leak on consumer.close() #8
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,10 +26,10 @@ import ( | |
"sync/atomic" | ||
"time" | ||
|
||
"github.com/uber/cherami-thrift/.generated/go/cherami" | ||
"github.com/uber/cherami-client-go/common" | ||
"github.com/uber/cherami-client-go/common/metrics" | ||
"github.com/uber/cherami-client-go/stream" | ||
"github.com/uber/cherami-thrift/.generated/go/cherami" | ||
|
||
"github.com/uber-common/bark" | ||
"github.com/uber/tchannel-go/thrift" | ||
|
@@ -38,7 +38,6 @@ import ( | |
|
||
type ( | ||
outputHostConnection struct { | ||
outputHostClient cherami.TChanBOut | ||
ackClient cherami.TChanBOut | ||
wsConnector WSConnector | ||
path string | ||
|
@@ -74,7 +73,7 @@ const ( | |
ackBatchDelay = time.Second / 10 | ||
) | ||
|
||
func newOutputHostConnection(client cherami.TChanBOut, ackClient cherami.TChanBOut, wsConnector WSConnector, | ||
func newOutputHostConnection(ackClient cherami.TChanBOut, wsConnector WSConnector, | ||
path, consumerGroupName string, options *ClientOptions, deliveryCh chan<- Delivery, | ||
reconfigureCh chan<- reconfigureInfo, connKey string, protocol cherami.Protocol, | ||
prefetchSize int32, logger bark.Logger, reporter metrics.Reporter) *outputHostConnection { | ||
|
@@ -90,7 +89,6 @@ func newOutputHostConnection(client cherami.TChanBOut, ackClient cherami.TChanBO | |
return &outputHostConnection{ | ||
connKey: connKey, | ||
protocol: protocol, | ||
outputHostClient: client, | ||
ackClient: ackClient, | ||
wsConnector: wsConnector, | ||
path: path, | ||
|
@@ -161,7 +159,7 @@ func (conn *outputHostConnection) close() { | |
} | ||
|
||
close(conn.closeChannel) | ||
|
||
conn.closeAcksBatchCh() // necessary to shutdown writeAcksPump within the connection | ||
atomic.StoreInt32(&conn.closed, 1) | ||
conn.logger.Info("Output host connection closed.") | ||
} | ||
|
@@ -175,7 +173,23 @@ func (conn *outputHostConnection) isClosed() bool { | |
return atomic.LoadInt32(&conn.closed) != 0 | ||
} | ||
|
||
// drainReadPipe reads and discards all messages on | ||
// the outputHostStream until it encounters | ||
// a read stream error | ||
func (conn *outputHostConnection) drainReadPipe() { | ||
for { | ||
if _, err := conn.outputHostStream.Read(); err != nil { | ||
return | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here should we try to write to the deliveryCh in a non-blocking way? That way, we can do a best-effort in delivering some messages if the application is still processing. That way we won't just drop the tail in case, say, the server is being restarted. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @aravindvs - The trouble is - we close the ack tchannel immediately after calling consumer.close(). So, even if you give the opportunity for the app to process them, they won't be able to ack/nack them, so, all that would do is delay the shutdown. @samar & I discussed a proposal for supporting graceful shutdown on the client. It would involve adding another API, something like client.initiateClose(). When the app calls this, the library will drain the read channel and enqueue everything to the delivery channel (and subsequently also close the deliveryChannel). The closing of delivery channel will be the signal for the app to indicate EOF. The app then acks/nacks all of them and finally calls client.close(). This patch doesn't address the clean shutdown. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah.. ok.. makes sense.. |
||
} | ||
} | ||
|
||
func (conn *outputHostConnection) readMessagesPump() { | ||
|
||
defer func() { | ||
conn.logger.Info("readMessagesPump done") | ||
}() | ||
|
||
var localCredits int32 | ||
for { | ||
conn.reporter.UpdateGauge(metrics.ConsumeLocalCredits, nil, int64(localCredits)) | ||
|
@@ -193,7 +207,6 @@ func (conn *outputHostConnection) readMessagesPump() { | |
if err != nil { | ||
// Error reading from stream. Time to close and bail out. | ||
conn.logger.Infof("Error reading OutputHost Message Stream: %v", err) | ||
|
||
// Stream is closed. Close the connection and bail out | ||
conn.close() | ||
return | ||
|
@@ -203,7 +216,15 @@ func (conn *outputHostConnection) readMessagesPump() { | |
conn.reporter.IncCounter(metrics.ConsumeMessageRate, nil, 1) | ||
msg := cmd.Message | ||
delivery := newDelivery(msg, conn) | ||
conn.deliveryCh <- delivery | ||
|
||
select { | ||
case conn.deliveryCh <- delivery: | ||
case <-conn.closeChannel: | ||
conn.logger.Info("close signal received, initiating readPump drain") | ||
conn.drainReadPipe() | ||
return | ||
} | ||
|
||
localCredits++ | ||
} else if cmd.GetType() == cherami.OutputHostCommandType_RECONFIGURE { | ||
conn.reporter.IncCounter(metrics.ConsumeReconfigureRate, nil, 1) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not related to this change but using atomic.LoadInt32 here is unnecessary. There is already a lock on entering close.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
samarabbas, isOpened() and isClosed() access the member without holding the lock. So, leaving as is.