From f8fac26833a98f01a955707d746a3e00cd44a192 Mon Sep 17 00:00:00 2001 From: "yang.li" Date: Mon, 6 Sep 2021 17:00:53 +0800 Subject: [PATCH] [WIP] Add exceptCIDRs for egress to avoid SNAT Avoid SNAT for assigned IP block with assigned protocol {and port}? Fixes: #2707 Signed-off-by: Yang Li yang.li@transwarp.io --- build/yamls/base/crds.yml | 8 ++++++ .../controller/egress/egress_controller.go | 25 ++++++++++++++--- pkg/agent/openflow/client.go | 7 ++--- pkg/agent/openflow/pipeline.go | 27 ++++++++++++++----- pkg/apis/crd/v1alpha2/types.go | 5 ++++ 5 files changed, 59 insertions(+), 13 deletions(-) diff --git a/build/yamls/base/crds.yml b/build/yamls/base/crds.yml index f69807a46c4..201ecc4958d 100644 --- a/build/yamls/base/crds.yml +++ b/build/yamls/base/crds.yml @@ -79,6 +79,14 @@ spec: oneOf: - format: ipv4 - format: ipv6 + excepts: + items: + properties: + cidr: + type: string + format: cidr + type: object + type: array externalIPPool: type: string status: diff --git a/pkg/agent/controller/egress/egress_controller.go b/pkg/agent/controller/egress/egress_controller.go index 3061caeb413..7b1480b6ed3 100644 --- a/pkg/agent/controller/egress/egress_controller.go +++ b/pkg/agent/controller/egress/egress_controller.go @@ -19,6 +19,7 @@ import ( "fmt" "net" "reflect" + _ "sort" "strings" "sync" "time" @@ -81,6 +82,8 @@ type egressState struct { egressIP string // The actual datapath mark of this Egress. Used to check if the mark changes since last process. mark uint32 + // The except Info for bypass SNAT flows + excepts []crdv1a2.Except // The actual openflow ports for which we have installed SNAT rules. Used to identify stale openflow ports when // updating or deleting an Egress. ofPorts sets.Int32 @@ -464,11 +467,12 @@ func (c *EgressController) deleteEgressState(egressName string) { delete(c.egressStates, egressName) } -func (c *EgressController) newEgressState(egressName string, egressIP string) *egressState { +func (c *EgressController) newEgressState(egressName string, egressIP string, excepts []crdv1a2.Except) *egressState { c.egressStatesMutex.Lock() defer c.egressStatesMutex.Unlock() state := &egressState{ egressIP: egressIP, + excepts: excepts, ofPorts: sets.NewInt32(), pods: sets.NewString(), } @@ -592,7 +596,7 @@ func (c *EgressController) syncEgress(egressName string) error { return nil } if !exist { - eState = c.newEgressState(egressName, egress.Spec.EgressIP) + eState = c.newEgressState(egressName, egress.Spec.EgressIP, egress.Spec.Excepts) } localNodeSelected, err := c.cluster.ShouldSelectEgress(egress) @@ -619,12 +623,13 @@ func (c *EgressController) syncEgress(egressName string) error { // If the mark changes, uninstall all of the Egress's Pod flows first, then installs them with new mark. // It could happen when the Egress IP is added to or removed from the Node. - if eState.mark != mark { + if eState.mark != mark || Equal(eState.excepts, egress.Spec.Excepts) { // Uninstall all of its Pod flows. if err := c.uninstallPodFlows(egressName, eState, eState.ofPorts, eState.pods); err != nil { return err } eState.mark = mark + eState.excepts = egress.Spec.Excepts } if err := c.updateEgressStatus(egress, c.localIPDetector.IsLocalIP(egress.Spec.EgressIP)); err != nil { @@ -671,7 +676,7 @@ func (c *EgressController) syncEgress(egressName string) error { staleOFPorts.Delete(ofPort) continue } - if err := c.ofClient.InstallPodSNATFlows(uint32(ofPort), egressIP, mark); err != nil { + if err := c.ofClient.InstallPodSNATFlows(uint32(ofPort), egressIP, mark, eState.excepts); err != nil { return err } eState.ofPorts.Insert(ofPort) @@ -872,3 +877,15 @@ func (c *EgressController) deleteEgressGroup(group *cpv1b2.EgressGroup) { delete(c.egressGroups, group.Name) c.queue.Add(group.Name) } + +func Equal(a, b []crdv1a2.Except) bool { + if len(a) != len(b) { + return false + } + for i, v := range a { + if v != b[i] { + return false + } + } + return true +} diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 5803ba56813..29a2385c9c4 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -26,6 +26,7 @@ import ( "antrea.io/antrea/pkg/agent/openflow/cookie" "antrea.io/antrea/pkg/agent/types" "antrea.io/antrea/pkg/agent/util" + crdv1a2 "antrea.io/antrea/pkg/apis/crd/v1alpha2" binding "antrea.io/antrea/pkg/ovs/openflow" utilip "antrea.io/antrea/pkg/util/ip" "antrea.io/antrea/third_party/proxy" @@ -176,7 +177,7 @@ type Client interface { // tunnel destination, and the packets should be SNAT'd on the remote // Node. As of now, a Pod can be configured to use only a single SNAT // IP in a single address family (IPv4 or IPv6). - InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error + InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32, excepts []crdv1a2.Except) error // UninstallPodSNATFlows removes the SNAT flows for the local Pod. UninstallPodSNATFlows(ofPort uint32) error @@ -806,8 +807,8 @@ func (c *client) UninstallSNATMarkFlows(mark uint32) error { return c.deleteFlows(c.snatFlowCache, cacheKey) } -func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error { - flows := []binding.Flow{c.snatRuleFlow(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC)} +func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32, excepts []crdv1a2.Except) error { + flows := c.snatRuleFlows(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC, excepts) cacheKey := fmt.Sprintf("p%x", ofPort) c.replayMutex.RLock() defer c.replayMutex.RUnlock() diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 7adf2cc0a77..6d1ca34ac3c 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -33,6 +33,7 @@ import ( "antrea.io/antrea/pkg/agent/metrics" "antrea.io/antrea/pkg/agent/openflow/cookie" "antrea.io/antrea/pkg/agent/types" + crdv1a2 "antrea.io/antrea/pkg/apis/crd/v1alpha2" binding "antrea.io/antrea/pkg/ovs/openflow" "antrea.io/antrea/pkg/ovs/ovsconfig" "antrea.io/antrea/pkg/ovs/ovsctl" @@ -1927,26 +1928,39 @@ func (c *client) snatIPFromTunnelFlow(snatIP net.IP, mark uint32) binding.Flow { Done() } -// snatRuleFlow generates a flow that applies the SNAT rule for a local Pod. If +// snatRuleFlows generates flows that applies the SNAT rule for a local Pod. If // the SNAT IP exists on the local Node, it sets the packet mark with the ID of // the SNAT IP, for the traffic from the ofPort to external; if the SNAT IP is // on a remote Node, it tunnels the packets to the SNAT IP. -func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr) binding.Flow { +func (c *client) snatRuleFlows(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr, excepts []crdv1a2.Except) []binding.Flow { ipProto := getIPProtocol(snatIP) snatTable := c.pipeline[snatTable] + l3FwdTable := c.pipeline[l3ForwardingTable] + nextTable := l3FwdTable.GetNext() + snatFlows := []binding.Flow{} if snatMark != 0 { // Local SNAT IP. - return snatTable.BuildFlow(priorityNormal). + return append(snatFlows, snatTable.BuildFlow(priorityNormal). MatchProtocol(ipProto). MatchCTStateNew(true).MatchCTStateTrk(true). MatchInPort(ofPort). Action().LoadPktMarkRange(snatMark, snatPktMarkRange). Action().GotoTable(snatTable.GetNext()). Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done() + Done()) + } + for _, except := range excepts { + _, excidr, _ := net.ParseCIDR(except.CIDR) + snatFlows = append(snatFlows, l3FwdTable.BuildFlow(priorityNormal). + MatchInPort(ofPort). + MatchProtocol(ipProto). + MatchDstIPNet(*excidr). + Action().GotoTable(nextTable). + Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). + Done()) } // SNAT IP should be on a remote Node. - return snatTable.BuildFlow(priorityNormal). + snatFlows = append(snatFlows, snatTable.BuildFlow(priorityNormal). MatchProtocol(ipProto). MatchInPort(ofPort). Action().SetSrcMAC(localGatewayMAC). @@ -1955,7 +1969,8 @@ func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, loc Action().SetTunnelDst(snatIP). Action().GotoTable(l3DecTTLTable). Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done() + Done()) + return snatFlows } // loadBalancerServiceFromOutsideFlow generates the flow to forward LoadBalancer service traffic from outside node diff --git a/pkg/apis/crd/v1alpha2/types.go b/pkg/apis/crd/v1alpha2/types.go index 437c3058e01..0cd5091d525 100644 --- a/pkg/apis/crd/v1alpha2/types.go +++ b/pkg/apis/crd/v1alpha2/types.go @@ -212,6 +212,10 @@ type EgressStatus struct { EgressNode string `json:"egressNode"` } +type Except struct { + CIDR string `json:"cidr"` +} + // EgressSpec defines the desired state for Egress. type EgressSpec struct { // AppliedTo selects Pods to which the Egress will be applied. @@ -226,6 +230,7 @@ type EgressSpec struct { // If it is non-empty, the EgressIP will be assigned to a Node specified by the pool automatically and will failover // to a different Node when the Node becomes unreachable. ExternalIPPool string `json:"externalIPPool"` + Excepts []Except `json:"excepts"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object