Skip to content

Commit

Permalink
agent: remove agent cache dependency from service mesh leaf certifica…
Browse files Browse the repository at this point in the history
…te management

This extracts the leaf cert management from within the agent cache.

This code was produced by the following process:

1. All tests in agent/cache, agent/cache-types, agent/auto-config,
   agent/consul/servercert were run at each stage.

    - The tests in agent matching .*Leaf were run at each stage.

    - The tests in agent/leafcert were run at each stage after they
      existed.

2. The former leaf cert Fetch implementation was extracted into a new
   package behind a "fake RPC" endpoint to make it look almost like all
   other cache type internals.

3. The old cache type was shimmed to use the fake RPC endpoint and
   generally cleaned up.

4. I selectively duplicated all of Get/Notify/NotifyCallback/Prepopulate
   from the agent/cache.Cache implementation over into the new package.
   This was renamed as leafcert.Manager.

    - Code that was irrelevant to the leaf cert type was deleted
      (inlining blocking=true, refresh=false)

5. Everything that used the leaf cert cache type (including proxycfg
   stuff) was shifted to use the leafcert.Manager instead.

6. agent/cache-types tests were moved and gently replumbed to execute
   as-is against a leafcert.Manager.

7. Inspired by some of the locking changes from derek's branch I split
   the fat lock into N+1 locks.

8. The waiter chan struct{} was eventually replaced with a
   singleflight.Group around cache updates, which was likely the biggest
   net structural change.

9. The awkward two layers or logic produced as a byproduct of marrying
   the agent cache management code with the leaf cert type code was
   slowly coalesced and flattened to remove confusion.

10. The .*Leaf tests from the agent package were copied and made to work
    directly against a leafcert.Manager to increase direct coverage.

I have done a best effort attempt to port the previous leaf-cert cache
type's tests over in spirit, as well as to take the e2e-ish tests in the
agent package with Leaf in the test name and copy those into the
agent/leafcert package to get more direct coverage, rather than coverage
tangled up in the agent logic.

There is no net-new test coverage, just coverage that was pushed around
from elsewhere.
  • Loading branch information
rboyer committed Jun 9, 2023
1 parent 3cb7056 commit db251e0
Show file tree
Hide file tree
Showing 42 changed files with 3,561 additions and 2,133 deletions.
3 changes: 3 additions & 0 deletions .changelog/17075.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
agent: remove agent cache dependency from service mesh leaf certificate management
```
26 changes: 17 additions & 9 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
grpcDNS "github.com/hashicorp/consul/agent/grpc-external/services/dns"
middleware "github.com/hashicorp/consul/agent/grpc-middleware"
"github.com/hashicorp/consul/agent/hcp/scada"
"github.com/hashicorp/consul/agent/leafcert"
"github.com/hashicorp/consul/agent/local"
"github.com/hashicorp/consul/agent/proxycfg"
proxycfgglue "github.com/hashicorp/consul/agent/proxycfg-glue"
Expand Down Expand Up @@ -123,6 +124,7 @@ var configSourceToName = map[configSource]string{
ConfigSourceLocal: "local",
ConfigSourceRemote: "remote",
}

var configSourceFromName = map[string]configSource{
"local": ConfigSourceLocal,
"remote": ConfigSourceRemote,
Expand Down Expand Up @@ -247,6 +249,9 @@ type Agent struct {
// cache is the in-memory cache for data the Agent requests.
cache *cache.Cache

// leafCertManager issues and caches leaf certs as needed.
leafCertManager *leafcert.Manager

// checkReapAfter maps the check ID to a timeout after which we should
// reap its associated service
checkReapAfter map[structs.CheckID]time.Duration
Expand Down Expand Up @@ -428,6 +433,12 @@ type Agent struct {
// - create the AutoConfig object for future use in fully
// resolving the configuration
func New(bd BaseDeps) (*Agent, error) {
if bd.LeafCertManager == nil {
return nil, errors.New("LeafCertManager is required")
}
if bd.NetRPC == nil {
return nil, errors.New("NetRPC is required")
}
a := Agent{
checkReapAfter: make(map[structs.CheckID]time.Duration),
checkMonitors: make(map[structs.CheckID]*checks.CheckMonitor),
Expand All @@ -454,6 +465,7 @@ func New(bd BaseDeps) (*Agent, error) {
tlsConfigurator: bd.TLSConfigurator,
config: bd.RuntimeConfig,
cache: bd.Cache,
leafCertManager: bd.LeafCertManager,
routineManager: routine.NewManager(bd.Logger),
scadaProvider: bd.HCP.Provider,
}
Expand Down Expand Up @@ -497,6 +509,9 @@ func New(bd BaseDeps) (*Agent, error) {
},
}

// TODO(rb): remove this once NetRPC is properly available in BaseDeps without an Agent
bd.NetRPC.SetNetRPC(&a)

// We used to do this in the Start method. However it doesn't need to go
// there any longer. Originally it did because we passed the agent
// delegate to some of the cache registrations. Now we just
Expand Down Expand Up @@ -674,7 +689,7 @@ func (a *Agent) Start(ctx context.Context) error {
Datacenter: a.config.Datacenter,
ACLsEnabled: a.config.ACLsEnabled,
},
Cache: a.cache,
LeafCertManager: a.leafCertManager,
GetStore: func() servercert.Store { return server.FSM().State() },
TLSConfigurator: a.tlsConfigurator,
}
Expand Down Expand Up @@ -4354,13 +4369,6 @@ func (a *Agent) registerCache() {

a.cache.RegisterType(cachetype.ConnectCARootName, &cachetype.ConnectCARoot{RPC: a})

a.cache.RegisterType(cachetype.ConnectCALeafName, &cachetype.ConnectCALeaf{
RPC: a,
Cache: a.cache,
Datacenter: a.config.Datacenter,
TestOverrideCAChangeInitialDelay: a.config.ConnectTestCALeafRootChangeSpread,
})

a.cache.RegisterType(cachetype.IntentionMatchName, &cachetype.IntentionMatch{RPC: a})

a.cache.RegisterType(cachetype.IntentionUpstreamsName, &cachetype.IntentionUpstreams{RPC: a})
Expand Down Expand Up @@ -4521,7 +4529,7 @@ func (a *Agent) proxyDataSources() proxycfg.DataSources {
IntentionUpstreams: proxycfgglue.CacheIntentionUpstreams(a.cache),
IntentionUpstreamsDestination: proxycfgglue.CacheIntentionUpstreamsDestination(a.cache),
InternalServiceDump: proxycfgglue.CacheInternalServiceDump(a.cache),
LeafCertificate: proxycfgglue.CacheLeafCertificate(a.cache),
LeafCertificate: proxycfgglue.LocalLeafCerts(a.leafCertManager),
PeeredUpstreams: proxycfgglue.CachePeeredUpstreams(a.cache),
PeeringList: proxycfgglue.CachePeeringList(a.cache),
PreparedQuery: proxycfgglue.CachePrepraredQuery(a.cache),
Expand Down
21 changes: 8 additions & 13 deletions agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,30 @@ import (
"strings"
"time"

"github.com/hashicorp/go-bexpr"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
"github.com/mitchellh/hashstructure"

"github.com/hashicorp/consul/envoyextensions/xdscommon"
"github.com/hashicorp/consul/version"

"github.com/hashicorp/go-bexpr"
"github.com/hashicorp/serf/coordinate"
"github.com/hashicorp/serf/serf"
"github.com/mitchellh/hashstructure"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"

"github.com/hashicorp/consul/acl"
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/debug"
"github.com/hashicorp/consul/agent/leafcert"
"github.com/hashicorp/consul/agent/structs"
token_store "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/envoyextensions/xdscommon"
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/logging/monitor"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/consul/version"
)

type Self struct {
Expand Down Expand Up @@ -1571,7 +1570,7 @@ func (s *HTTPHandlers) AgentConnectCALeafCert(resp http.ResponseWriter, req *htt

// TODO(peering): expose way to get kind=mesh-gateway type cert with appropriate ACLs

args := cachetype.ConnectCALeafRequest{
args := leafcert.ConnectCALeafRequest{
Service: serviceName, // Need name not ID
}
var qOpts structs.QueryOptions
Expand Down Expand Up @@ -1600,17 +1599,13 @@ func (s *HTTPHandlers) AgentConnectCALeafCert(resp http.ResponseWriter, req *htt
return nil, nil
}

raw, m, err := s.agent.cache.Get(req.Context(), cachetype.ConnectCALeafName, &args)
reply, m, err := s.agent.leafCertManager.Get(req.Context(), &args)
if err != nil {
return nil, err
}

defer setCacheMeta(resp, &m)

reply, ok := raw.(*structs.IssuedCert)
if !ok {
// This should never happen, but we want to protect against panics
return nil, fmt.Errorf("internal error: response type not correct")
}
setIndex(resp, reply.ModifyIndex)

return reply, nil
Expand Down
25 changes: 18 additions & 7 deletions agent/agent_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ import (
"time"

"github.com/armon/go-metrics"

"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/version"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-uuid"
"github.com/hashicorp/serf/serf"
Expand All @@ -44,12 +40,14 @@ import (
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
tokenStore "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/envoyextensions/xdscommon"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/hashicorp/consul/testrpc"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/consul/version"
)

func createACLTokenWithAgentReadPolicy(t *testing.T, srv *HTTPHandlers) string {
Expand Down Expand Up @@ -6914,14 +6912,27 @@ func TestAgentConnectCALeafCert_good(t *testing.T) {
require.Equal(t, issued, issued2)
}

replyCh := make(chan *httptest.ResponseRecorder, 1)

go func(index string) {
resp := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil)
a.srv.h.ServeHTTP(resp, req)

replyCh <- resp
}(index)

// Set a new CA
ca2 := connect.TestCAConfigSet(t, a, nil)

// Issue a blocking query to ensure that the cert gets updated appropriately
t.Run("test blocking queries update leaf cert", func(t *testing.T) {
resp := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil)
a.srv.h.ServeHTTP(resp, req)
var resp *httptest.ResponseRecorder
select {
case resp = <-replyCh:
case <-time.After(500 * time.Millisecond):
t.Fatal("blocking query did not wake up during rotation")
}
dec := json.NewDecoder(resp.Body)
issued2 := &structs.IssuedCert{}
require.NoError(t, dec.Decode(issued2))
Expand Down
35 changes: 32 additions & 3 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import (
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/hcp"
"github.com/hashicorp/consul/agent/hcp/scada"
"github.com/hashicorp/consul/agent/leafcert"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
Expand Down Expand Up @@ -328,9 +329,16 @@ func TestAgent_HTTPMaxHeaderBytes(t *testing.T) {
},
HTTPMaxHeaderBytes: tt.maxHeaderBytes,
},
Cache: cache.New(cache.Options{}),
Cache: cache.New(cache.Options{}),
NetRPC: &LazyNetRPC{},
}

bd.LeafCertManager = leafcert.NewManager(leafcert.Deps{
CertSigner: leafcert.NewNetRPCCertSigner(bd.NetRPC),
RootsReader: leafcert.NewCachedRootsReader(bd.Cache, "dc1"),
Config: leafcert.Config{},
})

cfg := config.RuntimeConfig{BuildDate: time.Date(2000, 1, 1, 0, 0, 1, 0, time.UTC)}
bd, err = initEnterpriseBaseDeps(bd, &cfg)
require.NoError(t, err)
Expand Down Expand Up @@ -5443,9 +5451,16 @@ func TestAgent_ListenHTTP_MultipleAddresses(t *testing.T) {
&net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: ports[1]},
},
},
Cache: cache.New(cache.Options{}),
Cache: cache.New(cache.Options{}),
NetRPC: &LazyNetRPC{},
}

bd.LeafCertManager = leafcert.NewManager(leafcert.Deps{
CertSigner: leafcert.NewNetRPCCertSigner(bd.NetRPC),
RootsReader: leafcert.NewCachedRootsReader(bd.Cache, "dc1"),
Config: leafcert.Config{},
})

cfg := config.RuntimeConfig{BuildDate: time.Date(2000, 1, 1, 0, 0, 1, 0, time.UTC)}
bd, err = initEnterpriseBaseDeps(bd, &cfg)
require.NoError(t, err)
Expand Down Expand Up @@ -6029,9 +6044,16 @@ func TestAgent_startListeners(t *testing.T) {
RuntimeConfig: &config.RuntimeConfig{
HTTPAddrs: []net.Addr{},
},
Cache: cache.New(cache.Options{}),
Cache: cache.New(cache.Options{}),
NetRPC: &LazyNetRPC{},
}

bd.LeafCertManager = leafcert.NewManager(leafcert.Deps{
CertSigner: leafcert.NewNetRPCCertSigner(bd.NetRPC),
RootsReader: leafcert.NewCachedRootsReader(bd.Cache, "dc1"),
Config: leafcert.Config{},
})

bd, err := initEnterpriseBaseDeps(bd, &config.RuntimeConfig{})
require.NoError(t, err)

Expand Down Expand Up @@ -6161,8 +6183,15 @@ func TestAgent_startListeners_scada(t *testing.T) {
},
RuntimeConfig: &config.RuntimeConfig{},
Cache: cache.New(cache.Options{}),
NetRPC: &LazyNetRPC{},
}

bd.LeafCertManager = leafcert.NewManager(leafcert.Deps{
CertSigner: leafcert.NewNetRPCCertSigner(bd.NetRPC),
RootsReader: leafcert.NewCachedRootsReader(bd.Cache, "dc1"),
Config: leafcert.Config{},
})

cfg := config.RuntimeConfig{BuildDate: time.Date(2000, 1, 1, 0, 0, 1, 0, time.UTC)}
bd, err := initEnterpriseBaseDeps(bd, &cfg)
require.NoError(t, err)
Expand Down
22 changes: 10 additions & 12 deletions agent/auto-config/auto_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/config"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/leafcert"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
Expand Down Expand Up @@ -566,9 +567,8 @@ func TestGoRoutineManagement(t *testing.T) {
})

leafReq := ac.leafCertRequest()
mcfg.cache.On("Notify",
mcfg.leafCerts.On("Notify",
mock.Anything,
cachetype.ConnectCALeafName,
&leafReq,
leafWatchID,
mock.Anything,
Expand Down Expand Up @@ -717,10 +717,9 @@ func startedAutoConfig(t *testing.T, autoEncrypt bool) testAutoConfig {
mock.Anything,
).Return(nil).Once()

mcfg.cache.On("Notify",
mcfg.leafCerts.On("Notify",
mock.Anything,
cachetype.ConnectCALeafName,
&cachetype.ConnectCALeafRequest{
&leafcert.ConnectCALeafRequest{
Datacenter: "dc1",
Agent: "autoconf",
Token: originalToken,
Expand Down Expand Up @@ -875,10 +874,9 @@ func TestTokenUpdate(t *testing.T) {
})

leafCtx, leafCancel := context.WithCancel(context.Background())
testAC.mcfg.cache.On("Notify",
testAC.mcfg.leafCerts.On("Notify",
mock.Anything,
cachetype.ConnectCALeafName,
&cachetype.ConnectCALeafRequest{
&leafcert.ConnectCALeafRequest{
Datacenter: "dc1",
Agent: "autoconf",
Token: newToken,
Expand Down Expand Up @@ -975,14 +973,14 @@ func TestCertUpdate(t *testing.T) {
NotAfter: secondCert.ValidBefore,
}).Once()

req := cachetype.ConnectCALeafRequest{
req := leafcert.ConnectCALeafRequest{
Datacenter: "dc1",
Agent: "autoconf",
Token: testAC.originalToken,
DNSSAN: defaultDNSSANs,
IPSAN: defaultIPSANs,
}
require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
require.True(t, testAC.mcfg.leafCerts.sendNotification(context.Background(), req.Key(), cache.UpdateEvent{
CorrelationID: leafWatchID,
Result: secondCert,
Meta: cache.ResultMeta{
Expand Down Expand Up @@ -1102,14 +1100,14 @@ func TestFallback(t *testing.T) {

// now that all the mocks are set up we can trigger the whole thing by sending the second expired cert
// as a cache update event.
req := cachetype.ConnectCALeafRequest{
req := leafcert.ConnectCALeafRequest{
Datacenter: "dc1",
Agent: "autoconf",
Token: testAC.originalToken,
DNSSAN: defaultDNSSANs,
IPSAN: defaultIPSANs,
}
require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
require.True(t, testAC.mcfg.leafCerts.sendNotification(context.Background(), req.Key(), cache.UpdateEvent{
CorrelationID: leafWatchID,
Result: secondCert,
Meta: cache.ResultMeta{
Expand Down
Loading

0 comments on commit db251e0

Please sign in to comment.