Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change partition for peers in discovery chain targets #16770

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/_4832.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
peering: **(Consul Enterprise only)** Fix issue where resolvers, routers, and splitters referencing peer targets may not work correctly for non-default partitions and namespaces. Enterprise customers leveraging peering are encouraged to upgrade both servers and agents to avoid this problem.
```
17 changes: 14 additions & 3 deletions agent/consul/discoverychain/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,10 +719,21 @@ func (c *compiler) newTarget(opts structs.DiscoveryTargetOpts) *structs.Discover
} else {
// Don't allow Peer and Datacenter.
opts.Datacenter = ""
// Peer and Partition cannot both be set.
opts.Partition = acl.PartitionOrDefault("")
// Since discovery targets (for peering) are ONLY used to query the catalog, and
// not to generate the SNI it is more correct to switch this to the calling-side
// of the peering's partition as that matches where the replicated data is stored
// in the catalog. This is done to simplify the usage of peer targets in both
// the xds and proxycfg packages.
//
// The peer info data attached to service instances will have the embedded opaque
// SNI/SAN information generated by the remote side and that will have the
// OTHER partition properly specified.
opts.Partition = acl.PartitionOrDefault(c.evaluateInPartition)
// Default to "default" rather than c.evaluateInNamespace.
opts.Namespace = acl.PartitionOrDefault(opts.Namespace)
// Note that the namespace is not swapped out, because it should
// always match the value in the remote cluster (and shouldn't
// have been changed anywhere).
opts.Namespace = acl.NamespaceOrDefault(opts.Namespace)
}

t := structs.NewDiscoveryTarget(opts)
Expand Down
2 changes: 1 addition & 1 deletion agent/proxycfg/testing_api_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func TestConfigSnapshotAPIGateway(
upstreams := structs.TestUpstreams(t, false)

baseEvents = testSpliceEvents(baseEvents, setupTestVariationConfigEntriesAndSnapshot(
t, variation, upstreams, additionalEntries...,
t, variation, false, upstreams, additionalEntries...,
))

return testConfigSnapshotFixture(t, &structs.NodeService{
Expand Down
2 changes: 1 addition & 1 deletion agent/proxycfg/testing_connect_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func TestConfigSnapshotDiscoveryChain(
},
},
}, setupTestVariationConfigEntriesAndSnapshot(
t, variation, upstreams, additionalEntries...,
t, variation, enterprise, upstreams, additionalEntries...,
))

return testConfigSnapshotFixture(t, &structs.NodeService{
Expand Down
2 changes: 1 addition & 1 deletion agent/proxycfg/testing_ingress_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func TestConfigSnapshotIngressGateway(
upstreams = structs.Upstreams{upstreams[0]} // just keep 'db'

baseEvents = testSpliceEvents(baseEvents, setupTestVariationConfigEntriesAndSnapshot(
t, variation, upstreams, additionalEntries...,
t, variation, false, upstreams, additionalEntries...,
))
}

Expand Down
11 changes: 7 additions & 4 deletions agent/proxycfg/testing_mesh_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/discoverychain"
"github.com/hashicorp/consul/agent/structs"
Expand Down Expand Up @@ -474,6 +475,8 @@ func TestConfigSnapshotPeeredMeshGateway(t testing.T, variant string, nsFn func(
discoChains = make(map[structs.ServiceName]*structs.CompiledDiscoveryChain)
endpoints = make(map[structs.ServiceName]structs.CheckServiceNodes)
entries []structs.ConfigEntry
// This portion of the test is not currently enterprise-aware, but we need this to satisfy a function call.
entMeta = *acl.DefaultEnterpriseMeta()
)

switch variant {
Expand Down Expand Up @@ -660,17 +663,17 @@ func TestConfigSnapshotPeeredMeshGateway(t testing.T, variant string, nsFn func(
CorrelationID: "peering-connect-service:peer-a:db",
Result: &structs.IndexedCheckServiceNodes{
Nodes: structs.CheckServiceNodes{
structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc1", "peer-a", "10.40.1.1", false),
structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc1", "peer-a", "10.40.1.2", false),
structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc1", "peer-a", "10.40.1.1", false, entMeta),
structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc1", "peer-a", "10.40.1.2", false, entMeta),
},
},
},
UpdateEvent{
CorrelationID: "peering-connect-service:peer-b:alt",
Result: &structs.IndexedCheckServiceNodes{
Nodes: structs.CheckServiceNodes{
structs.TestCheckNodeServiceWithNameInPeer(t, "alt", "remote-dc", "peer-b", "10.40.2.1", false),
structs.TestCheckNodeServiceWithNameInPeer(t, "alt", "remote-dc", "peer-b", "10.40.2.2", true),
structs.TestCheckNodeServiceWithNameInPeer(t, "alt", "remote-dc", "peer-b", "10.40.2.1", false, entMeta),
structs.TestCheckNodeServiceWithNameInPeer(t, "alt", "remote-dc", "peer-b", "10.40.2.2", true, entMeta),
},
},
},
Expand Down
47 changes: 32 additions & 15 deletions agent/proxycfg/testing_upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
func setupTestVariationConfigEntriesAndSnapshot(
t testing.T,
variation string,
enterprise bool,
upstreams structs.Upstreams,
additionalEntries ...structs.ConfigEntry,
) []UpdateEvent {
Expand All @@ -24,7 +25,7 @@ func setupTestVariationConfigEntriesAndSnapshot(
dbUID = NewUpstreamID(&dbUpstream)
)

dbChain := setupTestVariationDiscoveryChain(t, variation, dbUID.EnterpriseMeta, additionalEntries...)
dbChain := setupTestVariationDiscoveryChain(t, variation, enterprise, dbUID.EnterpriseMeta, additionalEntries...)

nodes := TestUpstreamNodes(t, "db")
if variation == "register-to-terminating-gateway" {
Expand Down Expand Up @@ -106,14 +107,16 @@ func setupTestVariationConfigEntriesAndSnapshot(
},
})
uid := UpstreamID{
Name: "db",
Peer: "cluster-01",
EnterpriseMeta: acl.NewEnterpriseMetaWithPartition(dbUID.PartitionOrDefault(), ""),
Name: "db",
Peer: "cluster-01",
}
if enterprise {
uid.EnterpriseMeta = acl.NewEnterpriseMetaWithPartition(dbUID.PartitionOrDefault(), "ns9")
}
events = append(events, UpdateEvent{
CorrelationID: "upstream-peer:" + uid.String(),
Result: &structs.IndexedCheckServiceNodes{
Nodes: structs.CheckServiceNodes{structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc1", "cluster-01", "10.40.1.1", false)},
Nodes: structs.CheckServiceNodes{structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc2", "cluster-01", "10.40.1.1", false, uid.EnterpriseMeta)},
},
})
case "redirect-to-cluster-peer":
Expand All @@ -129,14 +132,16 @@ func setupTestVariationConfigEntriesAndSnapshot(
},
})
uid := UpstreamID{
Name: "db",
Peer: "cluster-01",
EnterpriseMeta: acl.NewEnterpriseMetaWithPartition(dbUID.PartitionOrDefault(), ""),
Name: "db",
Peer: "cluster-01",
}
if enterprise {
uid.EnterpriseMeta = acl.NewEnterpriseMetaWithPartition(dbUID.PartitionOrDefault(), "ns9")
}
events = append(events, UpdateEvent{
CorrelationID: "upstream-peer:" + uid.String(),
Result: &structs.IndexedCheckServiceNodes{
Nodes: structs.CheckServiceNodes{structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc2", "cluster-01", "10.40.1.1", false)},
Nodes: structs.CheckServiceNodes{structs.TestCheckNodeServiceWithNameInPeer(t, "db", "dc2", "cluster-01", "10.40.1.1", false, uid.EnterpriseMeta)},
},
})
case "failover-through-double-remote-gateway-triggered":
Expand Down Expand Up @@ -256,6 +261,7 @@ func setupTestVariationConfigEntriesAndSnapshot(
func setupTestVariationDiscoveryChain(
t testing.T,
variation string,
enterprise bool,
entMeta acl.EnterpriseMeta,
additionalEntries ...structs.ConfigEntry,
) *structs.CompiledDiscoveryChain {
Expand Down Expand Up @@ -343,6 +349,14 @@ func setupTestVariationDiscoveryChain(
},
)
case "failover-to-cluster-peer":
target := structs.ServiceResolverFailoverTarget{
Peer: "cluster-01",
}

if enterprise {
target.Namespace = "ns9"
}

entries = append(entries,
&structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Expand All @@ -352,24 +366,27 @@ func setupTestVariationDiscoveryChain(
RequestTimeout: 33 * time.Second,
Failover: map[string]structs.ServiceResolverFailover{
"*": {
Targets: []structs.ServiceResolverFailoverTarget{
{Peer: "cluster-01"},
},
Targets: []structs.ServiceResolverFailoverTarget{target},
},
},
},
)
case "redirect-to-cluster-peer":
redirect := &structs.ServiceResolverRedirect{
Peer: "cluster-01",
}
if enterprise {
redirect.Namespace = "ns9"
}

entries = append(entries,
&structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "db",
EnterpriseMeta: entMeta,
ConnectTimeout: 33 * time.Second,
RequestTimeout: 33 * time.Second,
Redirect: &structs.ServiceResolverRedirect{
Peer: "cluster-01",
},
Redirect: redirect,
},
)
case "failover-through-double-remote-gateway-triggered":
Expand Down
24 changes: 2 additions & 22 deletions agent/proxycfg/upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ func (s *handlerUpstreams) resetWatchesFromChain(
}

opts := targetWatchOpts{upstreamID: uid}
opts.fromChainTarget(chain, target)
opts.fromChainTarget(target)

err := s.watchUpstreamTarget(ctx, snap, opts)
if err != nil {
Expand Down Expand Up @@ -432,30 +432,13 @@ type targetWatchOpts struct {
entMeta *acl.EnterpriseMeta
}

func (o *targetWatchOpts) fromChainTarget(c *structs.CompiledDiscoveryChain, t *structs.DiscoveryTarget) {
func (o *targetWatchOpts) fromChainTarget(t *structs.DiscoveryTarget) {
o.chainID = t.ID
o.service = t.Service
o.filter = t.Subset.Filter
o.datacenter = t.Datacenter
o.peer = t.Peer
o.entMeta = t.GetEnterpriseMetadata()

// The peer-targets in a discovery chain intentionally clear out
// the partition field, since we don't know the remote service's partition.
// Therefore, we must query with the chain's local partition / DC, or else
// the services will not be found.
//
// Note that the namespace is not swapped out, because it should
// always match the value in the remote datacenter (and shouldn't
// have been changed anywhere).
if o.peer != "" {
o.datacenter = ""
// Clone the enterprise meta so it's not modified when we swap the partition.
var em acl.EnterpriseMeta
em.Merge(o.entMeta)
em.OverridePartition(c.Partition)
o.entMeta = &em
}
}

func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *ConfigSnapshotUpstreams, opts targetWatchOpts) error {
Expand All @@ -470,9 +453,6 @@ func (s *handlerUpstreams) watchUpstreamTarget(ctx context.Context, snap *Config

if opts.peer != "" {
uid = NewUpstreamIDFromTargetID(opts.chainID)
// chainID has the partition stripped. However, when a target is in a cluster peer, the partition should be set
// to the local partition (i.e chain.Partition), since the peered target is imported into the local partition.
uid.OverridePartition(opts.entMeta.PartitionOrDefault())
correlationID = upstreamPeerWatchIDPrefix + uid.String()
}

Expand Down
2 changes: 1 addition & 1 deletion agent/structs/discovery_chain.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ func (t *DiscoveryTarget) ToDiscoveryTargetOpts() DiscoveryTargetOpts {
func ChainID(opts DiscoveryTargetOpts) string {
// NOTE: this format is similar to the SNI syntax for simplicity
if opts.Peer != "" {
return fmt.Sprintf("%s.%s.default.external.%s", opts.Service, opts.Namespace, opts.Peer)
return fmt.Sprintf("%s.%s.%s.external.%s", opts.Service, opts.Namespace, opts.Partition, opts.Peer)
}
if opts.ServiceSubset == "" {
return fmt.Sprintf("%s.%s.%s.%s", opts.Service, opts.Namespace, opts.Partition, opts.Datacenter)
Expand Down
19 changes: 14 additions & 5 deletions agent/structs/testing_catalog.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package structs

import (
"fmt"

"github.com/hashicorp/consul/acl"
"github.com/mitchellh/go-testing-interface"
)

Expand Down Expand Up @@ -55,7 +58,14 @@ func TestNodeServiceWithName(t testing.T, name string) *NodeService {

const peerTrustDomain = "1c053652-8512-4373-90cf-5a7f6263a994.consul"

func TestCheckNodeServiceWithNameInPeer(t testing.T, name, dc, peer, ip string, useHostname bool) CheckServiceNode {
func TestCheckNodeServiceWithNameInPeer(t testing.T, name, dc, peer, ip string, useHostname bool, remoteEntMeta acl.EnterpriseMeta) CheckServiceNode {

// Non-default partitions have a different spiffe format.
spiffe := fmt.Sprintf("spiffe://%s/ns/default/dc/%s/svc/%s", peerTrustDomain, dc, name)
if !remoteEntMeta.InDefaultPartition() {
spiffe = fmt.Sprintf("spiffe://%s/ap/%s/ns/%s/dc/%s/svc/%s",
peerTrustDomain, remoteEntMeta.PartitionOrDefault(), remoteEntMeta.NamespaceOrDefault(), dc, name)
}
service := &NodeService{
Kind: ServiceKindTypical,
Service: name,
Expand All @@ -66,11 +76,10 @@ func TestCheckNodeServiceWithNameInPeer(t testing.T, name, dc, peer, ip string,
Connect: ServiceConnect{
PeerMeta: &PeeringServiceMeta{
SNI: []string{
name + ".default.default." + peer + ".external." + peerTrustDomain,
},
SpiffeID: []string{
"spiffe://" + peerTrustDomain + "/ns/default/dc/" + peer + "-dc/svc/" + name,
fmt.Sprintf("%s.%s.%s.%s.external.%s",
name, remoteEntMeta.NamespaceOrDefault(), remoteEntMeta.PartitionOrDefault(), peer, peerTrustDomain),
},
SpiffeID: []string{spiffe},
Protocol: "tcp",
},
},
Expand Down
4 changes: 0 additions & 4 deletions agent/xds/clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -1308,10 +1308,6 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(

targetUID := proxycfg.NewUpstreamIDFromTargetID(targetData.targetID)
if targetUID.Peer != "" {
// targetID already has a stripped partition, so targetUID will not have a partition either. However,
// when a failover target is in a cluster peer, the partition should be set to the local partition (i.e
// chain.Partition), since that's where the data is imported to.
targetUID.OverridePartition(chain.Partition)
peerMeta, found := upstreamsSnapshot.UpstreamPeerMeta(targetUID)
if !found {
s.Logger.Warn("failed to fetch upstream peering metadata for cluster", "target", targetUID)
Expand Down
Loading