From 2ebd4811600d5ec05125785ab8280d327f7490fb Mon Sep 17 00:00:00 2001 From: "yang.li" Date: Mon, 6 Sep 2021 17:00:53 +0800 Subject: [PATCH] [WIP] Add exceptCIDRs for egress to avoid SNAT Avoid SNAT for assigned IP block with assigned protocol {and port}? Fixes: #2707 Signed-off-by: Yang Li yang.li@transwarp.io --- build/yamls/base/crds.yml | 10 ++++++ .../controller/egress/egress_controller.go | 32 ++++++++++++++++--- pkg/agent/openflow/client.go | 6 ++-- pkg/agent/openflow/pipeline.go | 25 +++++++++++---- pkg/apis/crd/v1alpha2/types.go | 1 + 5 files changed, 61 insertions(+), 13 deletions(-) diff --git a/build/yamls/base/crds.yml b/build/yamls/base/crds.yml index f69807a46c4..97e3be2b477 100644 --- a/build/yamls/base/crds.yml +++ b/build/yamls/base/crds.yml @@ -79,6 +79,12 @@ spec: oneOf: - format: ipv4 - format: ipv6 + exceptCIDRs: + items: + oneOf: + - format: cidr + type: string + type: array externalIPPool: type: string status: @@ -94,6 +100,10 @@ spec: - jsonPath: .metadata.creationTimestamp name: Age type: date + - description: The excpet CIDRs for avoid SNAT + jsonPath: .spec.exceptCIDRs + name: ExceptCIDRs + type: string - description: The Owner Node of egress IP jsonPath: .status.egressNode name: Node diff --git a/pkg/agent/controller/egress/egress_controller.go b/pkg/agent/controller/egress/egress_controller.go index 3061caeb413..7431c29aa69 100644 --- a/pkg/agent/controller/egress/egress_controller.go +++ b/pkg/agent/controller/egress/egress_controller.go @@ -19,6 +19,7 @@ import ( "fmt" "net" "reflect" + "sort" "strings" "sync" "time" @@ -81,6 +82,8 @@ type egressState struct { egressIP string // The actual datapath mark of this Egress. Used to check if the mark changes since last process. mark uint32 + // The except CIDRs for bypass SNAT flows + exceptCIDRs []string // The actual openflow ports for which we have installed SNAT rules. Used to identify stale openflow ports when // updating or deleting an Egress. ofPorts sets.Int32 @@ -464,11 +467,12 @@ func (c *EgressController) deleteEgressState(egressName string) { delete(c.egressStates, egressName) } -func (c *EgressController) newEgressState(egressName string, egressIP string) *egressState { +func (c *EgressController) newEgressState(egressName string, egressIP string, exceptCIDRs []string) *egressState { c.egressStatesMutex.Lock() defer c.egressStatesMutex.Unlock() state := &egressState{ egressIP: egressIP, + exceptCIDRs: exceptCIDRs, ofPorts: sets.NewInt32(), pods: sets.NewString(), } @@ -592,7 +596,7 @@ func (c *EgressController) syncEgress(egressName string) error { return nil } if !exist { - eState = c.newEgressState(egressName, egress.Spec.EgressIP) + eState = c.newEgressState(egressName, egress.Spec.EgressIP, egress.Spec.ExceptCIDRs) } localNodeSelected, err := c.cluster.ShouldSelectEgress(egress) @@ -619,12 +623,13 @@ func (c *EgressController) syncEgress(egressName string) error { // If the mark changes, uninstall all of the Egress's Pod flows first, then installs them with new mark. // It could happen when the Egress IP is added to or removed from the Node. - if eState.mark != mark { + if eState.mark != mark || !Equal(eState.exceptCIDRs, egress.Spec.ExceptCIDRs) { // Uninstall all of its Pod flows. if err := c.uninstallPodFlows(egressName, eState, eState.ofPorts, eState.pods); err != nil { return err } eState.mark = mark + eState.exceptCIDRs = egress.Spec.ExceptCIDRs } if err := c.updateEgressStatus(egress, c.localIPDetector.IsLocalIP(egress.Spec.EgressIP)); err != nil { @@ -647,6 +652,11 @@ func (c *EgressController) syncEgress(egressName string) error { }() egressIP := net.ParseIP(eState.egressIP) + exceptCIDRs := []*net.IPNet{} + for _, exceptCIDR := range eState.exceptCIDRs { + _, excidr, _ := net.ParseCIDR(exceptCIDR) + exceptCIDRs = append(exceptCIDRs, excidr) + } // Install SNAT flows for desired Pods. for pod := range pods { eState.pods.Insert(pod) @@ -671,7 +681,7 @@ func (c *EgressController) syncEgress(egressName string) error { staleOFPorts.Delete(ofPort) continue } - if err := c.ofClient.InstallPodSNATFlows(uint32(ofPort), egressIP, mark); err != nil { + if err := c.ofClient.InstallPodSNATFlows(uint32(ofPort), egressIP, mark, exceptCIDRs); err != nil { return err } eState.ofPorts.Insert(ofPort) @@ -872,3 +882,17 @@ func (c *EgressController) deleteEgressGroup(group *cpv1b2.EgressGroup) { delete(c.egressGroups, group.Name) c.queue.Add(group.Name) } + +func Equal(a, b []string) bool { + if len(a) != len(b) { + return false + } + sort.Strings(a) + sort.Strings(b) + for i, v := range a { + if v != b[i] { + return false + } + } + return true +} diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 5803ba56813..eff464a53e2 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -176,7 +176,7 @@ type Client interface { // tunnel destination, and the packets should be SNAT'd on the remote // Node. As of now, a Pod can be configured to use only a single SNAT // IP in a single address family (IPv4 or IPv6). - InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error + InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32, exceptCIDRs []*net.IPNet) error // UninstallPodSNATFlows removes the SNAT flows for the local Pod. UninstallPodSNATFlows(ofPort uint32) error @@ -806,8 +806,8 @@ func (c *client) UninstallSNATMarkFlows(mark uint32) error { return c.deleteFlows(c.snatFlowCache, cacheKey) } -func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error { - flows := []binding.Flow{c.snatRuleFlow(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC)} +func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32, exceptCIDRs []*net.IPNet) error { + flows := c.snatRuleFlows(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC, exceptCIDRs) cacheKey := fmt.Sprintf("p%x", ofPort) c.replayMutex.RLock() defer c.replayMutex.RUnlock() diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 7adf2cc0a77..abde4ba71fb 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -1927,26 +1927,38 @@ func (c *client) snatIPFromTunnelFlow(snatIP net.IP, mark uint32) binding.Flow { Done() } -// snatRuleFlow generates a flow that applies the SNAT rule for a local Pod. If +// snatRuleFlows generates flows that applies the SNAT rule for a local Pod. If // the SNAT IP exists on the local Node, it sets the packet mark with the ID of // the SNAT IP, for the traffic from the ofPort to external; if the SNAT IP is // on a remote Node, it tunnels the packets to the SNAT IP. -func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr) binding.Flow { +func (c *client) snatRuleFlows(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr, exceptCIDRs []*net.IPNet) []binding.Flow { ipProto := getIPProtocol(snatIP) snatTable := c.pipeline[snatTable] + l3FwdTable := c.pipeline[l3ForwardingTable] + nextTable := l3FwdTable.GetNext() + snatFlows := []binding.Flow{} if snatMark != 0 { // Local SNAT IP. - return snatTable.BuildFlow(priorityNormal). + return append(snatFlows, snatTable.BuildFlow(priorityNormal). MatchProtocol(ipProto). MatchCTStateNew(true).MatchCTStateTrk(true). MatchInPort(ofPort). Action().LoadPktMarkRange(snatMark, snatPktMarkRange). Action().GotoTable(snatTable.GetNext()). Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done() + Done()) + } + for _, exceptCIDR := range exceptCIDRs { + snatFlows = append(snatFlows, l3FwdTable.BuildFlow(priorityNormal). + MatchProtocol(ipProto). + MatchInPort(ofPort). + MatchDstIPNet(*exceptCIDR). + Action().GotoTable(nextTable). + Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). + Done()) } // SNAT IP should be on a remote Node. - return snatTable.BuildFlow(priorityNormal). + snatFlows = append(snatFlows, snatTable.BuildFlow(priorityNormal). MatchProtocol(ipProto). MatchInPort(ofPort). Action().SetSrcMAC(localGatewayMAC). @@ -1955,7 +1967,8 @@ func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, loc Action().SetTunnelDst(snatIP). Action().GotoTable(l3DecTTLTable). Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()). - Done() + Done()) + return snatFlows } // loadBalancerServiceFromOutsideFlow generates the flow to forward LoadBalancer service traffic from outside node diff --git a/pkg/apis/crd/v1alpha2/types.go b/pkg/apis/crd/v1alpha2/types.go index 437c3058e01..2da8c865a98 100644 --- a/pkg/apis/crd/v1alpha2/types.go +++ b/pkg/apis/crd/v1alpha2/types.go @@ -226,6 +226,7 @@ type EgressSpec struct { // If it is non-empty, the EgressIP will be assigned to a Node specified by the pool automatically and will failover // to a different Node when the Node becomes unreachable. ExternalIPPool string `json:"externalIPPool"` + ExceptCIDRs []string `json:"exceptCIDRs"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object