Skip to content

Commit

Permalink
Agent drain: implement first half.
Browse files Browse the repository at this point in the history
- Agent sends drain signal to Server.
- Server only logs it.
  • Loading branch information
jkh52 committed Mar 20, 2024
1 parent d302a0a commit f7a7f0c
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 10 deletions.
39 changes: 33 additions & 6 deletions cmd/agent/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ import (
"net"
"net/http"
"net/http/pprof"
"os"
"os/signal"
"runtime"
runpprof "runtime/pprof"
"strconv"
"strings"
"syscall"
"time"

"github.com/prometheus/client_golang/prometheus/promhttp"
Expand All @@ -49,8 +52,8 @@ func NewAgentCommand(a *Agent, o *options.GrpcProxyAgentOptions) *cobra.Command
Use: "agent",
Long: `A gRPC agent, Connects to the proxy and then allows traffic to be forwarded to it.`,
RunE: func(cmd *cobra.Command, args []string) error {
stopCh := make(chan struct{})
return a.Run(o, stopCh)
drainCh, stopCh := SetupSignalHandler()
return a.Run(o, drainCh, stopCh)
},
}

Expand All @@ -64,13 +67,13 @@ type Agent struct {
cs *agent.ClientSet
}

func (a *Agent) Run(o *options.GrpcProxyAgentOptions, stopCh <-chan struct{}) error {
func (a *Agent) Run(o *options.GrpcProxyAgentOptions, drainCh, stopCh <-chan struct{}) error {
o.Print()
if err := o.Validate(); err != nil {
return fmt.Errorf("failed to validate agent options with %v", err)
}

cs, err := a.runProxyConnection(o, stopCh)
cs, err := a.runProxyConnection(o, drainCh, stopCh)
if err != nil {
return fmt.Errorf("failed to run proxy connection with %v", err)
}
Expand All @@ -92,7 +95,31 @@ func (a *Agent) Run(o *options.GrpcProxyAgentOptions, stopCh <-chan struct{}) er
return nil
}

func (a *Agent) runProxyConnection(o *options.GrpcProxyAgentOptions, stopCh <-chan struct{}) (*agent.ClientSet, error) {
var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM}

func SetupSignalHandler() (drainCh, stopCh <-chan struct{}) {
drain := make(chan struct{})
stop := make(chan struct{})
c := make(chan os.Signal, 2)
signal.Notify(c, shutdownSignals...)
labels := runpprof.Labels(
"core", "signalHandler",
)
go runpprof.Do(context.Background(), labels, func(context.Context) { handleSignals(c, drain, stop) })

return drain, stop
}

func handleSignals(signalCh chan os.Signal, drainCh, stopCh chan struct{}) {
s := <-signalCh
klog.V(2).InfoS("Received first signal", "signal", s)
close(drainCh)
s = <-signalCh
klog.V(2).InfoS("Received second signal", "signal", s)
close(stopCh)
}

func (a *Agent) runProxyConnection(o *options.GrpcProxyAgentOptions, drainCh, stopCh <-chan struct{}) (*agent.ClientSet, error) {
var tlsConfig *tls.Config
var err error
if tlsConfig, err = util.GetClientTLSConfig(o.CaCert, o.AgentCert, o.AgentKey, o.ProxyServerHost, o.AlpnProtos); err != nil {
Expand All @@ -106,7 +133,7 @@ func (a *Agent) runProxyConnection(o *options.GrpcProxyAgentOptions, stopCh <-ch
}),
}
cc := o.ClientSetConfig(dialOptions...)
cs := cc.NewAgentClientSet(stopCh)
cs := cc.NewAgentClientSet(drainCh, stopCh)
cs.Serve()

return cs, nil
Expand Down
20 changes: 19 additions & 1 deletion pkg/agent/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,11 @@ type Client struct {
address string
opts []grpc.DialOption
conn *grpc.ClientConn
stopCh chan struct{}

drainCh <-chan struct{}
drainOnce sync.Once
stopCh chan struct{}

// locks
sendLock sync.Mutex
recvLock sync.Mutex
Expand All @@ -158,6 +162,7 @@ func newAgentClient(address, agentID, agentIdentifiers string, cs *ClientSet, op
agentIdentifiers: agentIdentifiers,
opts: opts,
probeInterval: cs.probeInterval,
drainCh: cs.drainCh,
stopCh: make(chan struct{}),
serviceAccountTokenPath: cs.serviceAccountTokenPath,
connManager: newConnectionManager(),
Expand Down Expand Up @@ -325,6 +330,19 @@ func (a *Client) Serve() {
case <-a.stopCh:
klog.V(2).InfoS("stop agent client.")
return
case <-a.drainCh:
a.drainOnce.Do(func() {
klog.V(2).InfoS("drain agent client", "serverID", a.serverID, "agentID", a.agentID)
drainPkt := &client.Packet{
Type: client.PacketType_DRAIN,
Payload: &client.Packet_Drain{
Drain: &client.Drain{},
},
}
if err := a.Send(drainPkt); err != nil {
klog.ErrorS(err, "drain failure", "")
}
})
default:
}

Expand Down
34 changes: 34 additions & 0 deletions pkg/agent/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,40 @@ func TestFailedSend_DialResp_GRPC(t *testing.T) {
}()
}

func TestDrain(t *testing.T) {
var stream agent.AgentService_ConnectClient
drainCh := make(chan struct{})
stopCh := make(chan struct{})
cs := &ClientSet{
clients: make(map[string]*Client),
drainCh: drainCh,
stopCh: stopCh,
}
testClient := &Client{
connManager: newConnectionManager(),
drainCh: drainCh,
stopCh: stopCh,
cs: cs,
}
testClient.stream, stream = pipe()

// Start agent
go testClient.Serve()
defer close(stopCh)

// Simulate pod first shutdown signal
close(drainCh)

// Expect to receive DRAIN packet from (Agent) Client
pkt, err := stream.Recv()
if err != nil {
t.Fatal(err)
}
if pkt.Type != client.PacketType_DRAIN {
t.Errorf("expect PacketType_DRAIN; got %v", pkt.Type)
}
}

// fakeStream implements AgentService_ConnectClient
type fakeStream struct {
grpc.ClientStream
Expand Down
5 changes: 4 additions & 1 deletion pkg/agent/clientset.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ type ClientSet struct {
dialOptions []grpc.DialOption
// file path contains service account token
serviceAccountTokenPath string
// channel to signal that the agent is pending termination.
drainCh <-chan struct{}
// channel to signal shutting down the client set. Primarily for test.
stopCh <-chan struct{}

Expand Down Expand Up @@ -141,7 +143,7 @@ type ClientSetConfig struct {
SyncForever bool
}

func (cc *ClientSetConfig) NewAgentClientSet(stopCh <-chan struct{}) *ClientSet {
func (cc *ClientSetConfig) NewAgentClientSet(drainCh, stopCh <-chan struct{}) *ClientSet {
return &ClientSet{
clients: make(map[string]*Client),
agentID: cc.AgentID,
Expand All @@ -154,6 +156,7 @@ func (cc *ClientSetConfig) NewAgentClientSet(stopCh <-chan struct{}) *ClientSet
serviceAccountTokenPath: cc.ServiceAccountTokenPath,
warnOnChannelLimit: cc.WarnOnChannelLimit,
syncForever: cc.SyncForever,
drainCh: drainCh,
stopCh: stopCh,
}
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,8 @@ func (s *ProxyServer) serveRecvBackend(backend Backend, agentID string, recvCh <
klog.V(5).InfoS("CLOSE_RSP sent to frontend", "connectionID", resp.ConnectID)
}

case client.PacketType_DRAIN:
klog.V(2).InfoS("agent is draining", "agentID", agentID)
default:
klog.V(5).InfoS("Ignoring unrecognized packet from backend", "packet", pkt, "agentID", agentID)
}
Expand Down
26 changes: 24 additions & 2 deletions tests/framework/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"path/filepath"
"strconv"
"sync"
"syscall"
"testing"
"time"

Expand All @@ -53,6 +54,7 @@ type AgentRunner interface {
type Agent interface {
GetConnectedServerCount() (int, error)
Ready() bool
Drain()
Stop()
Metrics() metricstest.AgentTester
}
Expand All @@ -66,9 +68,10 @@ func (*InProcessAgentRunner) Start(t testing.TB, opts AgentOpts) (Agent, error)
}

ctx, cancel := context.WithCancel(context.Background())
drainCh := make(chan struct{})
stopCh := make(chan struct{})
go func() {
if err := a.Run(o, stopCh); err != nil {
if err := a.Run(o, drainCh, stopCh); err != nil {
log.Printf("ERROR running agent: %v", err)
cancel()
}
Expand All @@ -84,6 +87,7 @@ func (*InProcessAgentRunner) Start(t testing.TB, opts AgentOpts) (Agent, error)

pa := &inProcessAgent{
client: a.ClientSet(),
drainCh: drainCh,
stopCh: stopCh,
healthAddr: healthAddr,
}
Expand All @@ -94,12 +98,21 @@ func (*InProcessAgentRunner) Start(t testing.TB, opts AgentOpts) (Agent, error)
type inProcessAgent struct {
client *agent.ClientSet

drainOnce sync.Once
drainCh chan struct{}

stopOnce sync.Once
stopCh chan struct{}

healthAddr string
}

func (a *inProcessAgent) Drain() {
a.drainOnce.Do(func() {
close(a.drainCh)
})
}

func (a *inProcessAgent) Stop() {
a.stopOnce.Do(func() {
close(a.stopCh)
Expand Down Expand Up @@ -160,7 +173,16 @@ type externalAgent struct {
cmd *exec.Cmd
metrics *metricstest.Tester

stopOnce sync.Once
drainOnce sync.Once
stopOnce sync.Once
}

func (a *externalAgent) Drain() {
a.drainOnce.Do(func() {
if err := a.cmd.Process.Signal(syscall.SIGTERM); err != nil {
log.Fatalf("Error draining agent process: %v", err)
}
})
}

func (a *externalAgent) Stop() {
Expand Down
49 changes: 49 additions & 0 deletions tests/proxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,55 @@ func TestFailedDial_HTTPCONN(t *testing.T) {
resetAllMetrics() // For clean shutdown.
}

func TestProxyHandle_AfterDrain(t *testing.T) {
expectCleanShutdown(t)

server := httptest.NewServer(newEchoServer("hello"))
defer server.Close()

ps := runGRPCProxyServer(t)
defer ps.Stop()

a := runAgent(t, ps.AgentAddr())
defer a.Stop()
waitForConnectedServerCount(t, 1, a)

// Drain agent
a.Drain()

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tunnel, err := createSingleUseGrpcTunnel(ctx, ps.FrontAddr())
if err != nil {
t.Fatal(err)
}

c := &http.Client{
Transport: &http.Transport{
DialContext: tunnel.DialContext,
},
}

req, err := http.NewRequest("GET", server.URL, nil)
if err != nil {
t.Fatal(err)
}

r, err := c.Do(req)
if err != nil {
t.Fatal(err)
}
defer r.Body.Close()

data, err := io.ReadAll(r.Body)
if err != nil {
t.Fatal(err)
}
if string(data) != "hello" {
t.Errorf("expect %v; got %v", "hello", string(data))
}
}

func runGRPCProxyServer(t testing.TB) framework.ProxyServer {
return runGRPCProxyServerWithServerCount(t, 1)
}
Expand Down

0 comments on commit f7a7f0c

Please sign in to comment.