From 12464427eb1800ccdd5b2978865de08174e2ba5e Mon Sep 17 00:00:00 2001 From: Donatas Abraitis Date: Tue, 2 Feb 2021 22:50:32 +0200 Subject: [PATCH 1/2] Teardown tunnel automatically if peer's certificate expired Handle this with periodic keepalive ticks. This is needed to avoid hanging a connection even peer's certificate expired. In case you use short-lived certificates (let's say 2 hours), current behavior is a bit wrong, because the tunnel stays UP and RUNNING fine unless restarted nebula service. This is the log from how it's reflected. ``` time="2021-05-12T10:15:51Z" level=debug msg="Tunnel status" tunnelCheck="map[method:passive state:alive]" vpnIp=172.17.90.241 time="2021-05-12T10:15:59Z" level=debug msg="Tunnel status" tunnelCheck="map[method:passive state:alive]" vpnIp=172.17.90.241 time="2021-05-12T10:16:06Z" level=debug msg="Invalid certificate status" certName=ton31337 vpnIp=172.17.90.241 time="2021-05-12T10:16:06Z" level=debug msg="Tunnel status" tunnelCheck="map[method:passive state:alive]" vpnIp=172.17.90.241 time="2021-05-12T10:16:06Z" level=debug msg="Tunnel status" certName=ton31337 tunnelCheck="map[method:active state:testing]" vpnIp=172.17.90.241 time="2021-05-12T10:16:20Z" level=debug msg="Tunnel status" tunnelCheck="map[method:active state:alive]" vpnIp=172.17.90.241 time="2021-05-12T10:16:20Z" level=info msg="Tunnel status" certName=ton31337 tunnelCheck="map[method:active state:dead]" vpnIp=172.17.90.241 time="2021-05-12T10:16:20Z" level=debug msg="deleting 172.17.90.241 from lighthouse." time="2021-05-12T10:16:20Z" level=debug msg="Hostmap hostInfo deleted" hostMap="map[indexNumber:3347248980 mapName:main mapTotalSize:844 remoteIndexNumber:3605974939 vpnIp:172.17.90.241]" ``` Signed-off-by: Donatas Abraitis --- connection_manager.go | 82 +++++++++++++++++++++++++-------- connection_manager_test.go | 94 ++++++++++++++++++++++++++++++++++++++ examples/config.yml | 2 + interface.go | 3 ++ main.go | 1 + 5 files changed, 164 insertions(+), 18 deletions(-) diff --git a/connection_manager.go b/connection_manager.go index db5827415..11892c542 100644 --- a/connection_manager.go +++ b/connection_manager.go @@ -5,6 +5,7 @@ import ( "time" "github.com/sirupsen/logrus" + "github.com/slackhq/nebula/cert" ) // TODO: incount and outcount are intended as a shortcut to locking the mutexes for every single packet @@ -153,6 +154,22 @@ func (n *connectionManager) Run() { } } +// Check if peer's certificate is not expired or invalid. +func (n *connectionManager) checkToDisconnect(now time.Time, hostinfo *HostInfo) (bool, *cert.NebulaCertificate) { + if !n.intf.disconnectInvalid { + return false, nil + } + + if remoteCert := hostinfo.GetCert(); remoteCert != nil { + valid, _ := remoteCert.Verify(now, n.intf.caPool) + if !valid { + return true, remoteCert + } + } + + return false, nil +} + func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte) { n.TrafficTimer.advance(now) for { @@ -166,7 +183,30 @@ func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte) // Check for traffic coming back in from this host. traf := n.CheckIn(vpnIP) - // If we saw incoming packets from this ip, just return + hostinfo, err := n.hostMap.QueryVpnIP(vpnIP) + if err != nil { + n.l.Debugf("Not found in hostmap: %s", IntIp(vpnIP)) + + if !n.intf.disconnectInvalid { + n.ClearIP(vpnIP) + n.ClearPendingDeletion(vpnIP) + continue + } + } + + // If we set `pki.disconnect_invalid` and peer's cert is invalid, + // expired, etc. - disconnect immediately. + disconnect, remoteCert := n.checkToDisconnect(now, hostinfo) + if disconnect { + n.l.WithField("vpnIp", IntIp(vpnIP)). + WithField("certName", remoteCert.Details.Name). + Info("Invalid certificate status") + n.intf.closeTunnel(hostinfo, false) + continue + } + + // If we saw an incoming packets from this ip and peer's certificate is not + // expired, just ignore. if traf { if n.l.Level >= logrus.DebugLevel { n.l.WithField("vpnIp", IntIp(vpnIP)). @@ -178,15 +218,6 @@ func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte) continue } - // If we didn't we may need to probe or destroy the conn - hostinfo, err := n.hostMap.QueryVpnIP(vpnIP) - if err != nil { - n.l.Debugf("Not found in hostmap: %s", IntIp(vpnIP)) - n.ClearIP(vpnIP) - n.ClearPendingDeletion(vpnIP) - continue - } - hostinfo.logger(n.l). WithField("tunnelCheck", m{"state": "testing", "method": "active"}). Debug("Tunnel status") @@ -213,22 +244,37 @@ func (n *connectionManager) HandleDeletionTick(now time.Time) { vpnIP := ep.(uint32) - // If we saw incoming packets from this ip, just return + hostinfo, err := n.hostMap.QueryVpnIP(vpnIP) + if err != nil { + n.l.Debugf("Not found in hostmap: %s", IntIp(vpnIP)) + + if !n.intf.disconnectInvalid { + n.ClearIP(vpnIP) + n.ClearPendingDeletion(vpnIP) + continue + } + } + + // If we set `pki.disconnect_invalid` and peer's cert is invalid, + // expired, etc. - disconnect immediately. + disconnect, _ := n.checkToDisconnect(now, hostinfo) + if disconnect { + n.l.WithField("vpnIp", IntIp(vpnIP)). + Info("Invalid certificate status") + n.intf.closeTunnel(hostinfo, false) + continue + } + + // If we saw an incoming packets from this ip and peer's certificate is not + // expired, just ignore. traf := n.CheckIn(vpnIP) if traf { n.l.WithField("vpnIp", IntIp(vpnIP)). WithField("tunnelCheck", m{"state": "alive", "method": "active"}). Debug("Tunnel status") - n.ClearIP(vpnIP) - n.ClearPendingDeletion(vpnIP) - continue - } - hostinfo, err := n.hostMap.QueryVpnIP(vpnIP) - if err != nil { n.ClearIP(vpnIP) n.ClearPendingDeletion(vpnIP) - n.l.Debugf("Not found in hostmap: %s", IntIp(vpnIP)) continue } diff --git a/connection_manager_test.go b/connection_manager_test.go index d88aed264..05b706abd 100644 --- a/connection_manager_test.go +++ b/connection_manager_test.go @@ -1,6 +1,8 @@ package nebula import ( + "crypto/ed25519" + "crypto/rand" "net" "testing" "time" @@ -148,3 +150,95 @@ func Test_NewConnectionManagerTest2(t *testing.T) { assert.Contains(t, nc.hostMap.Hosts, vpnIP) } + +// Check if we can disconnect the peer. +// Validate if the peer's certificate is invalid (expired, etc.) +// Disconnect only if disconnectInvalid: true is set. +func Test_NewConnectionManagerTest_DisconnectInvalid(t *testing.T) { + now := time.Now() + l := NewTestLogger() + ipNet := net.IPNet{ + IP: net.IPv4(172, 1, 1, 2), + Mask: net.IPMask{255, 255, 255, 0}, + } + _, vpncidr, _ := net.ParseCIDR("172.1.1.1/24") + _, localrange, _ := net.ParseCIDR("10.1.1.1/24") + preferredRanges := []*net.IPNet{localrange} + hostMap := NewHostMap(l, "test", vpncidr, preferredRanges) + + // Generate keys for CA and peer's cert. + pubCA, privCA, _ := ed25519.GenerateKey(rand.Reader) + caCert := cert.NebulaCertificate{ + Details: cert.NebulaCertificateDetails{ + Name: "ca", + NotBefore: now, + NotAfter: now.Add(1 * time.Hour), + IsCA: true, + PublicKey: pubCA, + }, + } + caCert.Sign(privCA) + ncp := &cert.NebulaCAPool{ + CAs: cert.NewCAPool().CAs, + } + ncp.CAs["ca"] = &caCert + + pubCrt, _, _ := ed25519.GenerateKey(rand.Reader) + peerCert := cert.NebulaCertificate{ + Details: cert.NebulaCertificateDetails{ + Name: "host", + Ips: []*net.IPNet{&ipNet}, + Subnets: []*net.IPNet{}, + NotBefore: now, + NotAfter: now.Add(60 * time.Second), + PublicKey: pubCrt, + IsCA: false, + Issuer: "ca", + }, + } + peerCert.Sign(privCA) + + cs := &CertState{ + rawCertificate: []byte{}, + privateKey: []byte{}, + certificate: &cert.NebulaCertificate{}, + rawCertificateNoKey: []byte{}, + } + + lh := NewLightHouse(l, false, &net.IPNet{IP: net.IP{0, 0, 0, 0}, Mask: net.IPMask{0, 0, 0, 0}}, []uint32{}, 1000, 0, &udpConn{}, false, 1, false) + ifce := &Interface{ + hostMap: hostMap, + inside: &Tun{}, + outside: &udpConn{}, + certState: cs, + firewall: &Firewall{}, + lightHouse: lh, + handshakeManager: NewHandshakeManager(l, vpncidr, preferredRanges, hostMap, lh, &udpConn{}, defaultHandshakeConfig), + l: l, + disconnectInvalid: true, + caPool: ncp, + } + + // Create manager + nc := newConnectionManager(l, ifce, 5, 10) + hostinfo := nc.hostMap.AddVpnIP(vpnIP) + hostinfo.ConnectionState = &ConnectionState{ + certState: cs, + peerCert: &peerCert, + H: &noise.HandshakeState{}, + } + + // Move ahead 45s. + // Check if to disconnect with invalid certificate. + // Should be alive. + next_tick := now.Add(45 * time.Second) + disconnect, _ := nc.checkToDisconnect(next_tick, hostinfo) + assert.False(t, disconnect) + + // Move ahead 61s. + // Check if to disconnect with invalid certificate. + // Should be disconnected. + next_tick = now.Add(61 * time.Second) + disconnect, _ = nc.checkToDisconnect(next_tick, hostinfo) + assert.True(t, disconnect) +} diff --git a/examples/config.yml b/examples/config.yml index 7d4cf2373..5c50b789a 100644 --- a/examples/config.yml +++ b/examples/config.yml @@ -10,6 +10,8 @@ pki: #blocklist is a list of certificate fingerprints that we will refuse to talk to #blocklist: # - c99d4e650533b92061b09918e838a5a0a6aaee21eed1d12fd937682865936c72 + #disconnect_invalid is a toggle to force a client to be disconnected if the certificate is expired or invalid. + # disconnect_invalid: false # The static host map defines a set of hosts with fixed IP addresses on the internet (or any network). # A host can have multiple fixed IP addresses defined here, and nebula will try each when establishing a tunnel. diff --git a/interface.go b/interface.go index 108ca05b6..9ea6c3b2b 100644 --- a/interface.go +++ b/interface.go @@ -43,6 +43,7 @@ type InterfaceConfig struct { MessageMetrics *MessageMetrics version string caPool *cert.NebulaCAPool + disconnectInvalid bool ConntrackCacheTimeout time.Duration l *logrus.Logger @@ -67,6 +68,7 @@ type Interface struct { udpBatchSize int routines int caPool *cert.NebulaCAPool + disconnectInvalid bool // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse rebindCount int8 @@ -118,6 +120,7 @@ func NewInterface(c *InterfaceConfig) (*Interface, error) { writers: make([]*udpConn, c.routines), readers: make([]io.ReadWriteCloser, c.routines), caPool: c.caPool, + disconnectInvalid: c.disconnectInvalid, myVpnIp: ip2int(c.certState.certificate.Details.Ips[0].IP), conntrackCacheTimeout: c.ConntrackCacheTimeout, diff --git a/main.go b/main.go index a77559926..74f06b155 100644 --- a/main.go +++ b/main.go @@ -371,6 +371,7 @@ func Main(config *Config, configTest bool, buildVersion string, logger *logrus.L MessageMetrics: messageMetrics, version: buildVersion, caPool: caPool, + disconnectInvalid: config.GetBool("pki.disconnect_invalid", false), ConntrackCacheTimeout: conntrackCacheTimeout, l: l, From 040525fb543954419e011268a2b213830bd1436b Mon Sep 17 00:00:00 2001 From: Nate Brown Date: Tue, 19 Oct 2021 21:33:59 -0500 Subject: [PATCH 2/2] Log the reason for tearing down an invalid cert --- connection_manager.go | 65 +++++++++++++++++++------------------- connection_manager_test.go | 13 ++++---- examples/config.yml | 6 ++-- 3 files changed, 43 insertions(+), 41 deletions(-) diff --git a/connection_manager.go b/connection_manager.go index 11892c542..78b1a8a51 100644 --- a/connection_manager.go +++ b/connection_manager.go @@ -5,7 +5,6 @@ import ( "time" "github.com/sirupsen/logrus" - "github.com/slackhq/nebula/cert" ) // TODO: incount and outcount are intended as a shortcut to locking the mutexes for every single packet @@ -154,22 +153,6 @@ func (n *connectionManager) Run() { } } -// Check if peer's certificate is not expired or invalid. -func (n *connectionManager) checkToDisconnect(now time.Time, hostinfo *HostInfo) (bool, *cert.NebulaCertificate) { - if !n.intf.disconnectInvalid { - return false, nil - } - - if remoteCert := hostinfo.GetCert(); remoteCert != nil { - valid, _ := remoteCert.Verify(now, n.intf.caPool) - if !valid { - return true, remoteCert - } - } - - return false, nil -} - func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte) { n.TrafficTimer.advance(now) for { @@ -194,14 +177,7 @@ func (n *connectionManager) HandleMonitorTick(now time.Time, p, nb, out []byte) } } - // If we set `pki.disconnect_invalid` and peer's cert is invalid, - // expired, etc. - disconnect immediately. - disconnect, remoteCert := n.checkToDisconnect(now, hostinfo) - if disconnect { - n.l.WithField("vpnIp", IntIp(vpnIP)). - WithField("certName", remoteCert.Details.Name). - Info("Invalid certificate status") - n.intf.closeTunnel(hostinfo, false) + if n.handleInvalidCertificate(now, vpnIP, hostinfo) { continue } @@ -255,13 +231,7 @@ func (n *connectionManager) HandleDeletionTick(now time.Time) { } } - // If we set `pki.disconnect_invalid` and peer's cert is invalid, - // expired, etc. - disconnect immediately. - disconnect, _ := n.checkToDisconnect(now, hostinfo) - if disconnect { - n.l.WithField("vpnIp", IntIp(vpnIP)). - Info("Invalid certificate status") - n.intf.closeTunnel(hostinfo, false) + if n.handleInvalidCertificate(now, vpnIP, hostinfo) { continue } @@ -302,3 +272,34 @@ func (n *connectionManager) HandleDeletionTick(now time.Time) { } } } + +// handleInvalidCertificates will destroy a tunnel if pki.disconnect_invalid is true and the certificate is no longer valid +func (n *connectionManager) handleInvalidCertificate(now time.Time, vpnIP uint32, hostinfo *HostInfo) bool { + if !n.intf.disconnectInvalid { + return false + } + + remoteCert := hostinfo.GetCert() + if remoteCert == nil { + return false + } + + valid, err := remoteCert.Verify(now, n.intf.caPool) + if valid { + return false + } + + fingerprint, _ := remoteCert.Sha256Sum() + n.l.WithField("vpnIp", IntIp(vpnIP)).WithError(err). + WithField("certName", remoteCert.Details.Name). + WithField("fingerprint", fingerprint). + Info("Remote certificate is no longer valid, tearing down the tunnel") + + // Inform the remote and close the tunnel locally + n.intf.sendCloseTunnel(hostinfo) + n.intf.closeTunnel(hostinfo, false) + + n.ClearIP(vpnIP) + n.ClearPendingDeletion(vpnIP) + return true +} diff --git a/connection_manager_test.go b/connection_manager_test.go index 05b706abd..d3b2b4901 100644 --- a/connection_manager_test.go +++ b/connection_manager_test.go @@ -221,6 +221,7 @@ func Test_NewConnectionManagerTest_DisconnectInvalid(t *testing.T) { // Create manager nc := newConnectionManager(l, ifce, 5, 10) + ifce.connectionManager = nc hostinfo := nc.hostMap.AddVpnIP(vpnIP) hostinfo.ConnectionState = &ConnectionState{ certState: cs, @@ -231,14 +232,14 @@ func Test_NewConnectionManagerTest_DisconnectInvalid(t *testing.T) { // Move ahead 45s. // Check if to disconnect with invalid certificate. // Should be alive. - next_tick := now.Add(45 * time.Second) - disconnect, _ := nc.checkToDisconnect(next_tick, hostinfo) - assert.False(t, disconnect) + nextTick := now.Add(45 * time.Second) + destroyed := nc.handleInvalidCertificate(nextTick, vpnIP, hostinfo) + assert.False(t, destroyed) // Move ahead 61s. // Check if to disconnect with invalid certificate. // Should be disconnected. - next_tick = now.Add(61 * time.Second) - disconnect, _ = nc.checkToDisconnect(next_tick, hostinfo) - assert.True(t, disconnect) + nextTick = now.Add(61 * time.Second) + destroyed = nc.handleInvalidCertificate(nextTick, vpnIP, hostinfo) + assert.True(t, destroyed) } diff --git a/examples/config.yml b/examples/config.yml index 5c50b789a..a550c0c49 100644 --- a/examples/config.yml +++ b/examples/config.yml @@ -7,11 +7,11 @@ pki: ca: /etc/nebula/ca.crt cert: /etc/nebula/host.crt key: /etc/nebula/host.key - #blocklist is a list of certificate fingerprints that we will refuse to talk to + # blocklist is a list of certificate fingerprints that we will refuse to talk to #blocklist: # - c99d4e650533b92061b09918e838a5a0a6aaee21eed1d12fd937682865936c72 - #disconnect_invalid is a toggle to force a client to be disconnected if the certificate is expired or invalid. - # disconnect_invalid: false + # disconnect_invalid is a toggle to force a client to be disconnected if the certificate is expired or invalid. + #disconnect_invalid: false # The static host map defines a set of hosts with fixed IP addresses on the internet (or any network). # A host can have multiple fixed IP addresses defined here, and nebula will try each when establishing a tunnel.