Skip to content

Commit

Permalink
Fix the issue of local probe bypassing flows on Windows
Browse files Browse the repository at this point in the history
When proxyAll is enabled, kube-proxy can be replaced by AntreaProxy, then
Service traffic and non-Service traffic can be distinguished by ServiceCTMark
and NotServiceCTMark. Service traffic with ServiceCTMark should not bypass
Network Policies, and non-Service traffic generated by kubelet with
NotServiceCTMark should bypass Network Policies.

Signed-off-by: Hongliang Liu <[email protected]>
  • Loading branch information
hongliangl committed Apr 20, 2022
1 parent 5905e98 commit 5fc6a24
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 23 deletions.
3 changes: 2 additions & 1 deletion pkg/agent/openflow/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,8 @@ func (c *client) generatePipelines() {
c.networkConfig,
c.ovsDatapathType,
c.connectUplinkToBridge,
c.enableMulticast)
c.enableMulticast,
c.proxyAll)
c.activatedFeatures = append(c.activatedFeatures, c.featurePodConnectivity)
c.traceableFeatures = append(c.traceableFeatures, c.featurePodConnectivity)

Expand Down
9 changes: 5 additions & 4 deletions pkg/agent/openflow/fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,12 @@ var (
FromGatewayCTMark = binding.NewCTMark(ConnSourceCTMarkField, gatewayVal)
FromBridgeCTMark = binding.NewCTMark(ConnSourceCTMarkField, bridgeVal)

// CTMark[4]: Mark to indicate DNAT is performed on the connection for Service.
// This CT mark is used in CtZone / CtZoneV6 and SNATCtZone / SNATCtZoneV6.
ServiceCTMark = binding.NewOneBitCTMark(4)
// CTMark[4]: Marks to indicate whether DNAT is performed on the connection for Service.
// These CT marks are used in CtZone / CtZoneV6 and SNATCtZone / SNATCtZoneV6.
ServiceCTMark = binding.NewOneBitCTMark(4)
NotServiceCTMark = binding.NewOneBitZeroCTMark(4)

// CTMark[5]: Mark to indicate SNAT should be performed on the connection for Service.
// CTMark[5]: Mark to indicate SNAT is performed on the connection for Service.
// This CT mark is only used in CtZone / CtZoneV6.
ConnSNATCTMark = binding.NewOneBitCTMark(5)

Expand Down
44 changes: 29 additions & 15 deletions pkg/agent/openflow/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -2094,34 +2094,48 @@ func (f *featureNetworkPolicy) dnsPacketInFlow(conjunctionID uint32) binding.Flo
Done()
}

// localProbeFlow generates the flow to forward locally generated packets to ConntrackCommitTable, bypassing ingress
// rules of Network Policies. The packets are sent by kubelet to probe the liveness/readiness of local Pods.
// On Linux and when OVS kernel datapath is used, it identifies locally generated packets by matching the
// HostLocalSourceMark, otherwise it matches the source IP. The difference is because:
// 1. On Windows, kube-proxy userspace mode is used, and currently there is no way to distinguish kubelet generated
// traffic from kube-proxy proxied traffic.
// localProbeFlows generates the flows to forward locally generated request packets to stageConntrack directly, bypassing
// ingress rules of Network Policies. The packets are sent by kubelet to probe the liveness/readiness of local Pods.
// On Linux and when OVS kernel datapath is used, the probe packets are identified by matching the HostLocalSourceMark.
// On Windows or when OVS userspace (netdev) datapath is used, we need a different approach because:
// 1. On Windows, kube-proxy userspace mode is used, and currently there is no way to distinguish kubelet generated traffic
// from kube-proxy proxied traffic.
// 2. pkt_mark field is not properly supported for OVS userspace (netdev) datapath.
// Note that there is a defect in the latter way that NodePort Service access by external clients will be masqueraded as
// a local gateway IP to bypass Network Policies. See https://github.com/antrea-io/antrea/issues/280.
// TODO: Fix it after replacing kube-proxy with AntreaProxy.
func (f *featurePodConnectivity) localProbeFlow() []binding.Flow {
// When proxyAll is disabled, the probe packets are identified by matching the source IP is the Antrea gateway IP;
// otherwise, the packets are identified by matching both the Antrea gateway IP and NotServiceCTMark. Note that, when
// proxyAll is disabled, currently there is no way to distinguish kubelet generated traffic from kube-proxy proxied traffic
// only by matching the Antrea gateway IP. There is a defect that NodePort Service access by external clients will be
// masqueraded as the Antrea gateway IP to bypass NetworkPolicies. See https://github.com/antrea-io/antrea/issues/280.
func (f *featurePodConnectivity) localProbeFlows() []binding.Flow {
cookieID := f.cookieAllocator.Request(f.category).Raw()
var flows []binding.Flow
if runtime.IsWindowsPlatform() || f.ovsDatapathType == ovsconfig.OVSDatapathNetdev {
var ctMarksToMatch []*binding.CtMark
if f.proxyAll {
ctMarksToMatch = append(ctMarksToMatch, NotServiceCTMark)
}
for ipProtocol, gatewayIP := range f.gatewayIPs {
flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh).
Cookie(cookieID).
MatchProtocol(ipProtocol).
MatchCTStateRpl(false).
MatchCTStateTrk(true).
MatchSrcIP(gatewayIP).
MatchCTMark(ctMarksToMatch...).
Action().GotoStage(stageConntrack).
Done())
}
} else {
flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh).
Cookie(cookieID).
MatchPktMark(types.HostLocalSourceMark, &types.HostLocalSourceMark).
Action().GotoStage(stageConntrack).
Done())
for _, ipProtocol := range f.ipProtocols {
flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh).
Cookie(cookieID).
MatchProtocol(ipProtocol).
MatchCTStateRpl(false).
MatchCTStateTrk(true).
MatchPktMark(types.HostLocalSourceMark, &types.HostLocalSourceMark).
Action().GotoStage(stageConntrack).
Done())
}
}
return flows
}
Expand Down
7 changes: 5 additions & 2 deletions pkg/agent/openflow/pod_connectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type featurePodConnectivity struct {
ctZoneSrcField *binding.RegField
ipCtZoneTypeRegMarks map[binding.Protocol]*binding.RegMark
enableMulticast bool
proxyAll bool

category cookie.Category
}
Expand All @@ -59,7 +60,8 @@ func newFeaturePodConnectivity(
networkConfig *config.NetworkConfig,
ovsDatapathType ovsconfig.OVSDatapathType,
connectUplinkToBridge bool,
enableMulticast bool) *featurePodConnectivity {
enableMulticast bool,
proxyAll bool) *featurePodConnectivity {
ctZones := make(map[binding.Protocol]int)
gatewayIPs := make(map[binding.Protocol]net.IP)
localCIDRs := make(map[binding.Protocol]net.IPNet)
Expand Down Expand Up @@ -101,6 +103,7 @@ func newFeaturePodConnectivity(
ipCtZoneTypeRegMarks: ipCtZoneTypeRegMarks,
ctZoneSrcField: getZoneSrcField(connectUplinkToBridge),
enableMulticast: enableMulticast,
proxyAll: proxyAll,
category: cookie.PodConnectivity,
}
}
Expand Down Expand Up @@ -140,7 +143,7 @@ func (f *featurePodConnectivity) initFlows() []binding.Flow {
flows = append(flows, f.gatewayIPSpoofGuardFlows()...)
flows = append(flows, f.l3FwdFlowToGateway()...)
// Add flow to ensure the liveliness check packet could be forwarded correctly.
flows = append(flows, f.localProbeFlow()...)
flows = append(flows, f.localProbeFlows()...)

if f.networkConfig.TrafficEncapMode.SupportsEncap() {
flows = append(flows, f.tunnelClassifierFlow(config.DefaultTunOFPort))
Expand Down
3 changes: 3 additions & 0 deletions pkg/ovs/openflow/ofctrl_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ func (b *ofFlowBuilder) MatchCTStateSNAT(set bool) FlowBuilder {
}

func (b *ofFlowBuilder) MatchCTMark(marks ...*CtMark) FlowBuilder {
if len(marks) == 0 {
return b
}
var value, mask uint32
for _, mark := range marks {
value |= mark.GetValue()
Expand Down
2 changes: 1 addition & 1 deletion test/integration/agent/openflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,7 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa
tableName: "IngressSecurityClassifier",
flows: []*ofTestUtils.ExpectFlow{
{
MatchStr: fmt.Sprintf("priority=210,%s,%s=%s", ipProtoStr, nwSrcStr, gwIP.String()),
MatchStr: fmt.Sprintf("priority=210,ct_state=-rpl+trk,%s,%s=%s", ipProtoStr, nwSrcStr, gwIP.String()),
ActStr: "goto_table:ConntrackCommit",
},
},
Expand Down

0 comments on commit 5fc6a24

Please sign in to comment.