Skip to content

Commit

Permalink
[WIP] Add exceptCIDRs for egress to avoid SNAT
Browse files Browse the repository at this point in the history
Avoid SNAT for assigned IP block with assigned protocol {and port}?

Fixes: antrea-io#2707

Signed-off-by: Yang Li [email protected]
  • Loading branch information
leonstack committed Sep 7, 2021
1 parent 589e1f7 commit 2ebd481
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 13 deletions.
10 changes: 10 additions & 0 deletions build/yamls/base/crds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ spec:
oneOf:
- format: ipv4
- format: ipv6
exceptCIDRs:
items:
oneOf:
- format: cidr
type: string
type: array
externalIPPool:
type: string
status:
Expand All @@ -94,6 +100,10 @@ spec:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
- description: The excpet CIDRs for avoid SNAT
jsonPath: .spec.exceptCIDRs
name: ExceptCIDRs
type: string
- description: The Owner Node of egress IP
jsonPath: .status.egressNode
name: Node
Expand Down
32 changes: 28 additions & 4 deletions pkg/agent/controller/egress/egress_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"fmt"
"net"
"reflect"
"sort"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -81,6 +82,8 @@ type egressState struct {
egressIP string
// The actual datapath mark of this Egress. Used to check if the mark changes since last process.
mark uint32
// The except CIDRs for bypass SNAT flows
exceptCIDRs []string
// The actual openflow ports for which we have installed SNAT rules. Used to identify stale openflow ports when
// updating or deleting an Egress.
ofPorts sets.Int32
Expand Down Expand Up @@ -464,11 +467,12 @@ func (c *EgressController) deleteEgressState(egressName string) {
delete(c.egressStates, egressName)
}

func (c *EgressController) newEgressState(egressName string, egressIP string) *egressState {
func (c *EgressController) newEgressState(egressName string, egressIP string, exceptCIDRs []string) *egressState {
c.egressStatesMutex.Lock()
defer c.egressStatesMutex.Unlock()
state := &egressState{
egressIP: egressIP,
exceptCIDRs: exceptCIDRs,
ofPorts: sets.NewInt32(),
pods: sets.NewString(),
}
Expand Down Expand Up @@ -592,7 +596,7 @@ func (c *EgressController) syncEgress(egressName string) error {
return nil
}
if !exist {
eState = c.newEgressState(egressName, egress.Spec.EgressIP)
eState = c.newEgressState(egressName, egress.Spec.EgressIP, egress.Spec.ExceptCIDRs)
}

localNodeSelected, err := c.cluster.ShouldSelectEgress(egress)
Expand All @@ -619,12 +623,13 @@ func (c *EgressController) syncEgress(egressName string) error {

// If the mark changes, uninstall all of the Egress's Pod flows first, then installs them with new mark.
// It could happen when the Egress IP is added to or removed from the Node.
if eState.mark != mark {
if eState.mark != mark || !Equal(eState.exceptCIDRs, egress.Spec.ExceptCIDRs) {
// Uninstall all of its Pod flows.
if err := c.uninstallPodFlows(egressName, eState, eState.ofPorts, eState.pods); err != nil {
return err
}
eState.mark = mark
eState.exceptCIDRs = egress.Spec.ExceptCIDRs
}

if err := c.updateEgressStatus(egress, c.localIPDetector.IsLocalIP(egress.Spec.EgressIP)); err != nil {
Expand All @@ -647,6 +652,11 @@ func (c *EgressController) syncEgress(egressName string) error {
}()

egressIP := net.ParseIP(eState.egressIP)
exceptCIDRs := []*net.IPNet{}
for _, exceptCIDR := range eState.exceptCIDRs {
_, excidr, _ := net.ParseCIDR(exceptCIDR)
exceptCIDRs = append(exceptCIDRs, excidr)
}
// Install SNAT flows for desired Pods.
for pod := range pods {
eState.pods.Insert(pod)
Expand All @@ -671,7 +681,7 @@ func (c *EgressController) syncEgress(egressName string) error {
staleOFPorts.Delete(ofPort)
continue
}
if err := c.ofClient.InstallPodSNATFlows(uint32(ofPort), egressIP, mark); err != nil {
if err := c.ofClient.InstallPodSNATFlows(uint32(ofPort), egressIP, mark, exceptCIDRs); err != nil {
return err
}
eState.ofPorts.Insert(ofPort)
Expand Down Expand Up @@ -872,3 +882,17 @@ func (c *EgressController) deleteEgressGroup(group *cpv1b2.EgressGroup) {
delete(c.egressGroups, group.Name)
c.queue.Add(group.Name)
}

func Equal(a, b []string) bool {
if len(a) != len(b) {
return false
}
sort.Strings(a)
sort.Strings(b)
for i, v := range a {
if v != b[i] {
return false
}
}
return true
}
6 changes: 3 additions & 3 deletions pkg/agent/openflow/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ type Client interface {
// tunnel destination, and the packets should be SNAT'd on the remote
// Node. As of now, a Pod can be configured to use only a single SNAT
// IP in a single address family (IPv4 or IPv6).
InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error
InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32, exceptCIDRs []*net.IPNet) error

// UninstallPodSNATFlows removes the SNAT flows for the local Pod.
UninstallPodSNATFlows(ofPort uint32) error
Expand Down Expand Up @@ -806,8 +806,8 @@ func (c *client) UninstallSNATMarkFlows(mark uint32) error {
return c.deleteFlows(c.snatFlowCache, cacheKey)
}

func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error {
flows := []binding.Flow{c.snatRuleFlow(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC)}
func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32, exceptCIDRs []*net.IPNet) error {
flows := c.snatRuleFlows(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC, exceptCIDRs)
cacheKey := fmt.Sprintf("p%x", ofPort)
c.replayMutex.RLock()
defer c.replayMutex.RUnlock()
Expand Down
25 changes: 19 additions & 6 deletions pkg/agent/openflow/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -1927,26 +1927,38 @@ func (c *client) snatIPFromTunnelFlow(snatIP net.IP, mark uint32) binding.Flow {
Done()
}

// snatRuleFlow generates a flow that applies the SNAT rule for a local Pod. If
// snatRuleFlows generates flows that applies the SNAT rule for a local Pod. If
// the SNAT IP exists on the local Node, it sets the packet mark with the ID of
// the SNAT IP, for the traffic from the ofPort to external; if the SNAT IP is
// on a remote Node, it tunnels the packets to the SNAT IP.
func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr) binding.Flow {
func (c *client) snatRuleFlows(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr, exceptCIDRs []*net.IPNet) []binding.Flow {
ipProto := getIPProtocol(snatIP)
snatTable := c.pipeline[snatTable]
l3FwdTable := c.pipeline[l3ForwardingTable]
nextTable := l3FwdTable.GetNext()
snatFlows := []binding.Flow{}
if snatMark != 0 {
// Local SNAT IP.
return snatTable.BuildFlow(priorityNormal).
return append(snatFlows, snatTable.BuildFlow(priorityNormal).
MatchProtocol(ipProto).
MatchCTStateNew(true).MatchCTStateTrk(true).
MatchInPort(ofPort).
Action().LoadPktMarkRange(snatMark, snatPktMarkRange).
Action().GotoTable(snatTable.GetNext()).
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done()
Done())
}
for _, exceptCIDR := range exceptCIDRs {
snatFlows = append(snatFlows, l3FwdTable.BuildFlow(priorityNormal).
MatchProtocol(ipProto).
MatchInPort(ofPort).
MatchDstIPNet(*exceptCIDR).
Action().GotoTable(nextTable).
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done())
}
// SNAT IP should be on a remote Node.
return snatTable.BuildFlow(priorityNormal).
snatFlows = append(snatFlows, snatTable.BuildFlow(priorityNormal).
MatchProtocol(ipProto).
MatchInPort(ofPort).
Action().SetSrcMAC(localGatewayMAC).
Expand All @@ -1955,7 +1967,8 @@ func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, loc
Action().SetTunnelDst(snatIP).
Action().GotoTable(l3DecTTLTable).
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done()
Done())
return snatFlows
}

// loadBalancerServiceFromOutsideFlow generates the flow to forward LoadBalancer service traffic from outside node
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/crd/v1alpha2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ type EgressSpec struct {
// If it is non-empty, the EgressIP will be assigned to a Node specified by the pool automatically and will failover
// to a different Node when the Node becomes unreachable.
ExternalIPPool string `json:"externalIPPool"`
ExceptCIDRs []string `json:"exceptCIDRs"`
}

// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
Expand Down

0 comments on commit 2ebd481

Please sign in to comment.