Skip to content

Commit

Permalink
Add peering .service and .node DNS lookups. (#15596)
Browse files Browse the repository at this point in the history
Add peering `.service` and `.node` DNS lookups.
  • Loading branch information
hashi-derek authored Nov 29, 2022
1 parent 7561303 commit 95dc0c7
Show file tree
Hide file tree
Showing 4 changed files with 257 additions and 37 deletions.
3 changes: 3 additions & 0 deletions .changelog/15596.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
dns: Add support for cluster peering `.service` and `.node` DNS queries.
```
128 changes: 91 additions & 37 deletions agent/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type dnsConfig struct {
}

type serviceLookup struct {
PeerName string
Datacenter string
Service string
Tag string
Expand All @@ -116,6 +117,7 @@ type serviceLookup struct {

type nodeLookup struct {
Datacenter string
PeerName string
Node string
Tag string
MaxRecursionLevel int
Expand Down Expand Up @@ -421,11 +423,18 @@ func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
// server side to avoid transferring the entire node list.
if err := d.agent.RPC("Catalog.ListNodes", &args, &out); err == nil {
for _, n := range out.Nodes {
lookup := serviceLookup{
// Peering PTR lookups are currently not supported, so we don't
// need to populate that field for creating the node FQDN.
// PeerName: n.PeerName,
Datacenter: n.Datacenter,
EnterpriseMeta: *n.GetEnterpriseMeta(),
}
arpa, _ := dns.ReverseAddr(n.Address)
if arpa == qName {
ptr := &dns.PTR{
Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: 0},
Ptr: fmt.Sprintf("%s.node.%s.%s", n.Node, datacenter, d.domain),
Ptr: nodeCanonicalDNSName(lookup, n.Node, d.domain),
}
m.Answer = append(m.Answer, ptr)
break
Expand Down Expand Up @@ -685,8 +694,16 @@ type queryLocality struct {
// Example query: <service>.virtual.<namespace>.ns.<partition>.ap.<datacenter>.dc.consul
datacenter string

// peerOrDatacenter is parsed from DNS queries where the datacenter and peer name are specified in the same query part.
// peer is the peer name parsed from a label that has explicit parts.
// Example query: <service>.virtual.<namespace>.ns.<peer>.peer.<partition>.ap.consul
peer string

// peerOrDatacenter is parsed from DNS queries where the datacenter and peer name are
// specified in the same query part.
// Example query: <service>.virtual.<peerOrDatacenter>.consul
//
// Note that this field should only be a "peer" for virtual queries, since virtual IPs should
// not be shared between datacenters. In all other cases, it should be considered a DC.
peerOrDatacenter string

acl.EnterpriseMeta
Expand Down Expand Up @@ -763,11 +780,17 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi

lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
PeerName: locality.peer,
Connect: false,
Ingress: false,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// Only one of dc or peer can be used.
if lookup.PeerName != "" {
lookup.Datacenter = ""
}

// Support RFC 2782 style syntax
if n == 2 && strings.HasPrefix(queryParts[1], "_") && strings.HasPrefix(queryParts[0], "_") {

Expand Down Expand Up @@ -808,6 +831,9 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi
return invalid()
}

// Peering is not currently supported for connect queries.
// Exposing this likely would not provide much value, since users would
// need to be very familiar with our TLS / SNI / mesh gateways to leverage it.
lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
Service: queryParts[len(queryParts)-1],
Expand All @@ -833,13 +859,18 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi
// The datacenter of the request is not specified because cross-datacenter virtual IP
// queries are not supported. This guard rail is in place because virtual IPs are allocated
// within a DC, therefore their uniqueness is not guaranteed globally.
PeerName: locality.peerOrDatacenter,
PeerName: locality.peer,
ServiceName: queryParts[len(queryParts)-1],
EnterpriseMeta: locality.EnterpriseMeta,
QueryOptions: structs.QueryOptions{
Token: d.agent.tokens.UserToken(),
},
}
if args.PeerName == "" {
// If the peer name was not explicitly defined, fall back to the ambiguously-parsed version.
args.PeerName = locality.peerOrDatacenter
}

var out string
if err := d.agent.RPC("Catalog.VirtualIPForService", &args, &out); err != nil {
return err
Expand Down Expand Up @@ -868,6 +899,8 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi
return invalid()
}

// Peering is not currently supported for ingress queries.
// We probably should not be encouraging chained calls from ingress to peers anyway.
lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
Service: queryParts[len(queryParts)-1],
Expand Down Expand Up @@ -900,10 +933,15 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi

lookup := nodeLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
PeerName: locality.peer,
Node: node,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// Only one of dc or peer can be used.
if lookup.PeerName != "" {
lookup.Datacenter = ""
}

return d.nodeLookup(cfg, lookup, req, resp)

Expand Down Expand Up @@ -1040,6 +1078,7 @@ func (d *DNSServer) nodeLookup(cfg *dnsConfig, lookup nodeLookup, req, resp *dns
// Make an RPC request
args := &structs.NodeSpecificRequest{
Datacenter: lookup.Datacenter,
PeerName: lookup.PeerName,
Node: lookup.Node,
QueryOptions: structs.QueryOptions{
Token: d.agent.tokens.UserToken(),
Expand Down Expand Up @@ -1366,6 +1405,7 @@ func (d *DNSServer) lookupServiceNodes(cfg *dnsConfig, lookup serviceLookup) (st
serviceTags = []string{lookup.Tag}
}
args := structs.ServiceSpecificRequest{
PeerName: lookup.PeerName,
Connect: lookup.Connect,
Ingress: lookup.Ingress,
Datacenter: lookup.Datacenter,
Expand Down Expand Up @@ -1416,9 +1456,9 @@ func (d *DNSServer) serviceLookup(cfg *dnsConfig, lookup serviceLookup, req, res
// Add various responses depending on the request
qType := req.Question[0].Qtype
if qType == dns.TypeSRV {
d.serviceSRVRecords(cfg, lookup.Datacenter, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
d.serviceSRVRecords(cfg, lookup, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
} else {
d.serviceNodeRecords(cfg, lookup.Datacenter, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
d.serviceNodeRecords(cfg, lookup, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
}

if len(resp.Answer) == 0 {
Expand Down Expand Up @@ -1521,10 +1561,14 @@ func (d *DNSServer) preparedQueryLookup(cfg *dnsConfig, datacenter, query string

// Add various responses depending on the request.
qType := req.Question[0].Qtype

// This serviceLookup only needs the datacenter field populated,
// because peering is not supported with prepared queries.
lookup := serviceLookup{Datacenter: out.Datacenter}
if qType == dns.TypeSRV {
d.serviceSRVRecords(cfg, out.Datacenter, out.Nodes, req, resp, ttl, maxRecursionLevel)
d.serviceSRVRecords(cfg, lookup, out.Nodes, req, resp, ttl, maxRecursionLevel)
} else {
d.serviceNodeRecords(cfg, out.Datacenter, out.Nodes, req, resp, ttl, maxRecursionLevel)
d.serviceNodeRecords(cfg, lookup, out.Nodes, req, resp, ttl, maxRecursionLevel)
}

if len(resp.Answer) == 0 {
Expand Down Expand Up @@ -1575,15 +1619,15 @@ RPC:
}

// serviceNodeRecords is used to add the node records for a service lookup
func (d *DNSServer) serviceNodeRecords(cfg *dnsConfig, dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
func (d *DNSServer) serviceNodeRecords(cfg *dnsConfig, lookup serviceLookup, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
handled := make(map[string]struct{})
var answerCNAME []dns.RR = nil

count := 0
for _, node := range nodes {
// Add the node record
had_answer := false
records, _ := d.nodeServiceRecords(dc, node, req, ttl, cfg, maxRecursionLevel)
records, _ := d.nodeServiceRecords(lookup, node, req, ttl, cfg, maxRecursionLevel)
if len(records) == 0 {
continue
}
Expand Down Expand Up @@ -1666,15 +1710,20 @@ func findWeight(node structs.CheckServiceNode) int {
}
}

func (d *DNSServer) encodeIPAsFqdn(questionName string, dc string, ip net.IP) string {
func (d *DNSServer) encodeIPAsFqdn(questionName string, lookup serviceLookup, ip net.IP) string {
ipv4 := ip.To4()
respDomain := d.getResponseDomain(questionName)
ipStr := hex.EncodeToString(ip)
if ipv4 != nil {
ipStr := hex.EncodeToString(ip)
return fmt.Sprintf("%s.addr.%s.%s", ipStr[len(ipStr)-(net.IPv4len*2):], dc, respDomain)
} else {
return fmt.Sprintf("%s.addr.%s.%s", hex.EncodeToString(ip), dc, respDomain)
ipStr = ipStr[len(ipStr)-(net.IPv4len*2):]
}
if lookup.PeerName != "" {
// Exclude the datacenter from the FQDN on the addr for peers.
// This technically makes no difference, since the addr endpoint ignores the DC
// component of the request, but do it anyway for a less confusing experience.
return fmt.Sprintf("%s.addr.%s", ipStr, respDomain)
}
return fmt.Sprintf("%s.addr.%s.%s", ipStr, lookup.Datacenter, respDomain)
}

func makeARecord(qType uint16, ip net.IP, ttl time.Duration) dns.RR {
Expand Down Expand Up @@ -1753,16 +1802,16 @@ func (d *DNSServer) makeRecordFromNode(node *structs.Node, qType uint16, qName s
// Craft dns records for a service
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the node IP
// Otherwise it will return a IN A record
func (d *DNSServer) makeRecordFromServiceNode(dc string, serviceNode structs.CheckServiceNode, addr net.IP, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
func (d *DNSServer) makeRecordFromServiceNode(lookup serviceLookup, serviceNode structs.CheckServiceNode, addr net.IP, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
q := req.Question[0]
respDomain := d.getResponseDomain(q.Name)

ipRecord := makeARecord(q.Qtype, addr, ttl)
if ipRecord == nil {
return nil, nil
}

if q.Qtype == dns.TypeSRV {
nodeFQDN := fmt.Sprintf("%s.node.%s.%s", serviceNode.Node.Node, dc, respDomain)
respDomain := d.getResponseDomain(q.Name)
nodeFQDN := nodeCanonicalDNSName(lookup, serviceNode.Node.Node, respDomain)
answers := []dns.RR{
&dns.SRV{
Hdr: dns.RR_Header{
Expand All @@ -1773,7 +1822,7 @@ func (d *DNSServer) makeRecordFromServiceNode(dc string, serviceNode structs.Che
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(dc, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: nodeFQDN,
},
}
Expand All @@ -1789,15 +1838,15 @@ func (d *DNSServer) makeRecordFromServiceNode(dc string, serviceNode structs.Che
// Craft dns records for an IP
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the IP
// Otherwise it will return a IN A record
func (d *DNSServer) makeRecordFromIP(dc string, addr net.IP, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
func (d *DNSServer) makeRecordFromIP(lookup serviceLookup, addr net.IP, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
q := req.Question[0]
ipRecord := makeARecord(q.Qtype, addr, ttl)
if ipRecord == nil {
return nil, nil
}

if q.Qtype == dns.TypeSRV {
ipFQDN := d.encodeIPAsFqdn(q.Name, dc, addr)
ipFQDN := d.encodeIPAsFqdn(q.Name, lookup, addr)
answers := []dns.RR{
&dns.SRV{
Hdr: dns.RR_Header{
Expand All @@ -1808,7 +1857,7 @@ func (d *DNSServer) makeRecordFromIP(dc string, addr net.IP, serviceNode structs
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(dc, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: ipFQDN,
},
}
Expand All @@ -1824,7 +1873,7 @@ func (d *DNSServer) makeRecordFromIP(dc string, addr net.IP, serviceNode structs
// Craft dns records for an FQDN
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the IP
// Otherwise it will return a CNAME and a IN A record
func (d *DNSServer) makeRecordFromFQDN(dc string, fqdn string, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
func (d *DNSServer) makeRecordFromFQDN(lookup serviceLookup, fqdn string, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
edns := req.IsEdns0() != nil
q := req.Question[0]

Expand Down Expand Up @@ -1857,7 +1906,7 @@ MORE_REC:
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(dc, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: dns.Fqdn(fqdn),
},
}
Expand All @@ -1879,7 +1928,7 @@ MORE_REC:
return answers, nil
}

func (d *DNSServer) nodeServiceRecords(dc string, node structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
func (d *DNSServer) nodeServiceRecords(lookup serviceLookup, node structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
addrTranslate := TranslateAddressAcceptDomain
if req.Question[0].Qtype == dns.TypeA {
addrTranslate |= TranslateAddressAcceptIPv4
Expand All @@ -1889,7 +1938,9 @@ func (d *DNSServer) nodeServiceRecords(dc string, node structs.CheckServiceNode,
addrTranslate |= TranslateAddressAcceptAny
}

serviceAddr := d.agent.TranslateServiceAddress(dc, node.Service.Address, node.Service.TaggedAddresses, addrTranslate)
// The datacenter should be empty during translation if it is a peering lookup.
// This should be fine because we should always prefer the WAN address.
serviceAddr := d.agent.TranslateServiceAddress(lookup.Datacenter, node.Service.Address, node.Service.TaggedAddresses, addrTranslate)
nodeAddr := d.agent.TranslateAddress(node.Node.Datacenter, node.Node.Address, node.Node.TaggedAddresses, addrTranslate)
if serviceAddr == "" && nodeAddr == "" {
return nil, nil
Expand All @@ -1902,30 +1953,30 @@ func (d *DNSServer) nodeServiceRecords(dc string, node structs.CheckServiceNode,
if serviceAddr == "" && nodeIPAddr != nil {
if node.Node.Address != nodeAddr {
// Do not CNAME node address in case of WAN address
return d.makeRecordFromIP(dc, nodeIPAddr, node, req, ttl)
return d.makeRecordFromIP(lookup, nodeIPAddr, node, req, ttl)
}

return d.makeRecordFromServiceNode(dc, node, nodeIPAddr, req, ttl)
return d.makeRecordFromServiceNode(lookup, node, nodeIPAddr, req, ttl)
}

// There is no service address and the node address is a FQDN (external service)
if serviceAddr == "" {
return d.makeRecordFromFQDN(dc, nodeAddr, node, req, ttl, cfg, maxRecursionLevel)
return d.makeRecordFromFQDN(lookup, nodeAddr, node, req, ttl, cfg, maxRecursionLevel)
}

// The service address is an IP
if serviceIPAddr != nil {
return d.makeRecordFromIP(dc, serviceIPAddr, node, req, ttl)
return d.makeRecordFromIP(lookup, serviceIPAddr, node, req, ttl)
}

// If the service address is a CNAME for the service we are looking
// for then use the node address.
if dns.Fqdn(serviceAddr) == req.Question[0].Name && nodeIPAddr != nil {
return d.makeRecordFromServiceNode(dc, node, nodeIPAddr, req, ttl)
return d.makeRecordFromServiceNode(lookup, node, nodeIPAddr, req, ttl)
}

// The service address is a FQDN (external service)
return d.makeRecordFromFQDN(dc, serviceAddr, node, req, ttl, cfg, maxRecursionLevel)
return d.makeRecordFromFQDN(lookup, serviceAddr, node, req, ttl, cfg, maxRecursionLevel)
}

func (d *DNSServer) generateMeta(qName string, node *structs.Node, ttl time.Duration) []dns.RR {
Expand All @@ -1950,28 +2001,31 @@ func (d *DNSServer) generateMeta(qName string, node *structs.Node, ttl time.Dura
}

// serviceARecords is used to add the SRV records for a service lookup
func (d *DNSServer) serviceSRVRecords(cfg *dnsConfig, dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
func (d *DNSServer) serviceSRVRecords(cfg *dnsConfig, lookup serviceLookup, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
handled := make(map[string]struct{})

for _, node := range nodes {
// Avoid duplicate entries, possible if a node has
// the same service the same port, etc.
serviceAddress := d.agent.TranslateServiceAddress(dc, node.Service.Address, node.Service.TaggedAddresses, TranslateAddressAcceptAny)
servicePort := d.agent.TranslateServicePort(dc, node.Service.Port, node.Service.TaggedAddresses)

// The datacenter should be empty during translation if it is a peering lookup.
// This should be fine because we should always prefer the WAN address.
serviceAddress := d.agent.TranslateServiceAddress(lookup.Datacenter, node.Service.Address, node.Service.TaggedAddresses, TranslateAddressAcceptAny)
servicePort := d.agent.TranslateServicePort(lookup.Datacenter, node.Service.Port, node.Service.TaggedAddresses)
tuple := fmt.Sprintf("%s:%s:%d", node.Node.Node, serviceAddress, servicePort)
if _, ok := handled[tuple]; ok {
continue
}
handled[tuple] = struct{}{}

answers, extra := d.nodeServiceRecords(dc, node, req, ttl, cfg, maxRecursionLevel)
answers, extra := d.nodeServiceRecords(lookup, node, req, ttl, cfg, maxRecursionLevel)

respDomain := d.getResponseDomain(req.Question[0].Name)
resp.Answer = append(resp.Answer, answers...)
resp.Extra = append(resp.Extra, extra...)

if cfg.NodeMetaTXT {
resp.Extra = append(resp.Extra, d.generateMeta(fmt.Sprintf("%s.node.%s.%s", node.Node.Node, dc, respDomain), node.Node, ttl)...)
resp.Extra = append(resp.Extra, d.generateMeta(nodeCanonicalDNSName(lookup, node.Node.Node, respDomain), node.Node, ttl)...)
}
}
}
Expand Down
Loading

0 comments on commit 95dc0c7

Please sign in to comment.