diff --git a/felix/bpf-gpl/reasons.h b/felix/bpf-gpl/reasons.h index 5b5a05940ac..bd6dc2201c3 100644 --- a/felix/bpf-gpl/reasons.h +++ b/felix/bpf-gpl/reasons.h @@ -22,6 +22,7 @@ enum calico_reason { CALI_REASON_DECAP_FAIL, CALI_REASON_UNAUTH_SOURCE, CALI_REASON_RT_UNKNOWN, + CALI_REASON_BLACK_HOLE, CALI_REASON_ACCEPTED_BY_XDP, // Not used by counters map CALI_REASON_WEP_NOT_READY, CALI_REASON_NATIFACE, diff --git a/felix/bpf-gpl/routes.h b/felix/bpf-gpl/routes.h index 00804899f1b..3ffb87f072b 100644 --- a/felix/bpf-gpl/routes.h +++ b/felix/bpf-gpl/routes.h @@ -30,6 +30,8 @@ enum cali_rt_flags { CALI_RT_SAME_SUBNET = 0x20, CALI_RT_TUNNELED = 0x40, CALI_RT_NO_DSR = 0x80, + CALI_RT_BLACKHOLE_DROP = 0x100, + CALI_RT_BLACKHOLE_REJECT = 0x200, }; struct cali_rt { @@ -76,6 +78,8 @@ static CALI_BPF_INLINE enum cali_rt_flags cali_rt_lookup_flags(ipv46_addr_t *add #define cali_rt_is_host(rt) ((rt)->flags & CALI_RT_HOST) #define cali_rt_is_workload(rt) ((rt)->flags & CALI_RT_WORKLOAD) #define cali_rt_is_tunneled(rt) ((rt)->flags & CALI_RT_TUNNELED) +#define cali_rt_is_blackhole_drop(rt) ((rt)->flags & CALI_RT_BLACKHOLE_DROP) +#define cali_rt_is_blackhole_reject(rt) ((rt)->flags & CALI_RT_BLACKHOLE_REJECT) #define cali_rt_flags_host(t) (((t) & CALI_RT_HOST) == CALI_RT_HOST) #define cali_rt_flags_local_host(t) (((t) & (CALI_RT_LOCAL | CALI_RT_HOST)) == (CALI_RT_LOCAL | CALI_RT_HOST)) diff --git a/felix/bpf-gpl/tc.c b/felix/bpf-gpl/tc.c index 9e13076e2bc..3854b304a18 100644 --- a/felix/bpf-gpl/tc.c +++ b/felix/bpf-gpl/tc.c @@ -428,6 +428,7 @@ static CALI_BPF_INLINE void calico_tc_process_ct_lookup(struct cali_tc_ctx *ctx) * usecase of this is node-local-dns. */ ctx->state->flags |= CALI_ST_SKIP_FIB; + ctx->state->flags |= CALI_ST_NAT_EXCLUDE; } } @@ -541,8 +542,12 @@ static CALI_BPF_INLINE void calico_tc_process_ct_lookup(struct cali_tc_ctx *ctx) ctx->state->flags |= CALI_ST_SRC_IS_HOST; } - struct cali_rt *dest_rt = cali_rt_lookup(&ctx->state->post_nat_ip_dst); - + struct cali_rt *dest_rt = NULL; + // Route lookup is not done for those packets which are nat excluded, where there + // is a nat hit, but we don't resolve (such as node local DNS cache). + if (!(ctx->state->flags & CALI_ST_NAT_EXCLUDE)) { + dest_rt = cali_rt_lookup(&ctx->state->post_nat_ip_dst); + } if (!dest_rt) { CALI_DEBUG("No route for post DNAT dest %x\n", debug_ip(ctx->state->post_nat_ip_dst)); if (CALI_F_FROM_HEP) { @@ -558,6 +563,32 @@ static CALI_BPF_INLINE void calico_tc_process_ct_lookup(struct cali_tc_ctx *ctx) goto do_policy; } + /* If the dest route is a blackhole route, drop/reject the packet. + * This is based on the service loop prevention configuration. + * If ServiceLoopPrevention = Drop, route is a blackhole drop route. + * If ServiceLoopPrevention = Reject, route is a blackhole reject route. + * If ServiceLoopPrevention = Disabled, these routes are not programmed. + */ + if (CALI_F_TO_HOST) { + if (cali_rt_is_blackhole_drop(dest_rt)) { + CALI_DEBUG("Packet hit a black hole route: DROP\n"); + deny_reason(ctx, CALI_REASON_BLACK_HOLE); + goto deny; + } + if (cali_rt_is_blackhole_reject(dest_rt)) { + CALI_DEBUG("Packet hit a black hole route: REJECT\n"); + deny_reason(ctx, CALI_REASON_BLACK_HOLE); +#ifdef IPVER6 + ctx->state->icmp_type = ICMPV6_DEST_UNREACH; + ctx->state->icmp_code = ICMPV6_PORT_UNREACH; +#else + ctx->state->icmp_type = ICMP_DEST_UNREACH; + ctx->state->icmp_code = ICMP_PORT_UNREACH; +#endif + goto icmp_send_reply; + } + } + if (cali_rt_flags_local_host(dest_rt->flags)) { CALI_DEBUG("Post-NAT dest IP is local host.\n"); if (CALI_F_FROM_HEP && is_failsafe_in(ctx->state->ip_proto, ctx->state->post_nat_dport, ctx->state->ip_src)) { diff --git a/felix/bpf-gpl/types.h b/felix/bpf-gpl/types.h index 95caf5a09cb..e1c3b3ec53b 100644 --- a/felix/bpf-gpl/types.h +++ b/felix/bpf-gpl/types.h @@ -142,6 +142,8 @@ enum cali_state_flags { CALI_ST_CT_NP_LOOP = 0x80, /* CALI_ST_CT_NP_REMOTE is set when host is accessing a remote nodeport. */ CALI_ST_CT_NP_REMOTE = 0x100, + /* CALI_ST_NAT_EXCLUDE is set when there is a NAT hit, but we don't want to resolve (such as node local DNS). */ + CALI_ST_NAT_EXCLUDE = 0x200, }; struct fwd { diff --git a/felix/bpf/counters/counters.go b/felix/bpf/counters/counters.go index 0f3e5066fab..d49d1705323 100644 --- a/felix/bpf/counters/counters.go +++ b/felix/bpf/counters/counters.go @@ -72,6 +72,7 @@ const ( DroppedFailedDecap DroppedUnauthSource DroppedUnknownRoute + DroppedBlackholeRoute ) type Description struct { @@ -150,6 +151,10 @@ var descriptions DescList = DescList{ Counter: DroppedUnknownRoute, Category: "Dropped", Caption: "packets with unknown route", }, + { + Counter: DroppedBlackholeRoute, + Category: "Dropped", Caption: "packets hitting blackhole route", + }, } func Descriptions() DescList { diff --git a/felix/bpf/routes/map.go b/felix/bpf/routes/map.go index 106d65fe559..6a9da2e32d1 100644 --- a/felix/bpf/routes/map.go +++ b/felix/bpf/routes/map.go @@ -72,14 +72,16 @@ func (k Key) AsBytes() []byte { type Flags uint32 const ( - FlagInIPAMPool Flags = 0x01 - FlagNATOutgoing Flags = 0x02 - FlagWorkload Flags = 0x04 - FlagLocal Flags = 0x08 - FlagHost Flags = 0x10 - FlagSameSubnet Flags = 0x20 - FlagTunneled Flags = 0x40 - FlagNoDSR Flags = 0x80 + FlagInIPAMPool Flags = 0x01 + FlagNATOutgoing Flags = 0x02 + FlagWorkload Flags = 0x04 + FlagLocal Flags = 0x08 + FlagHost Flags = 0x10 + FlagSameSubnet Flags = 0x20 + FlagTunneled Flags = 0x40 + FlagNoDSR Flags = 0x80 + FlagBlackHoleDrop Flags = 0x100 + FlagBlackHoleReject Flags = 0x200 FlagsUnknown Flags = 0 FlagsRemoteWorkload = FlagWorkload @@ -136,7 +138,7 @@ func (v Value) String() string { if typeFlags&FlagLocal != 0 { parts = append(parts, "local") - } else { + } else if typeFlags&FlagBlackHoleDrop == 0 && typeFlags&FlagBlackHoleReject == 0 { parts = append(parts, "remote") } @@ -174,6 +176,14 @@ func (v Value) String() string { parts = append(parts, "nh", fmt.Sprint(v.NextHop())) } + if typeFlags&FlagBlackHoleDrop != 0 { + parts = append(parts, "blackhole-drop") + } + + if typeFlags&FlagBlackHoleReject != 0 { + parts = append(parts, "blackhole-reject") + } + if len(parts) == 0 { return fmt.Sprintf("unknown type (%d)", typeFlags) } diff --git a/felix/bpf/routes/map6.go b/felix/bpf/routes/map6.go index e6caf21ec90..9303477bc49 100644 --- a/felix/bpf/routes/map6.go +++ b/felix/bpf/routes/map6.go @@ -77,7 +77,7 @@ func (v ValueV6) String() string { if typeFlags&FlagLocal != 0 { parts = append(parts, "local") - } else { + } else if typeFlags&FlagBlackHoleDrop == 0 && typeFlags&FlagBlackHoleReject == 0 { parts = append(parts, "remote") } @@ -107,6 +107,14 @@ func (v ValueV6) String() string { parts = append(parts, "tunneled") } + if typeFlags&FlagBlackHoleDrop != 0 { + parts = append(parts, "blackhole-drop") + } + + if typeFlags&FlagBlackHoleReject != 0 { + parts = append(parts, "blackhole-reject") + } + if typeFlags&FlagLocal != 0 && typeFlags&FlagWorkload != 0 { parts = append(parts, "idx", fmt.Sprint(v.IfaceIndex())) } diff --git a/felix/bpf/ut/bpf_prog_test.go b/felix/bpf/ut/bpf_prog_test.go index 24e09aacfe6..286aefb3ec4 100644 --- a/felix/bpf/ut/bpf_prog_test.go +++ b/felix/bpf/ut/bpf_prog_test.go @@ -717,7 +717,7 @@ func objLoad(fname, bpfFsDir, ipFamily string, topts testOpts, polProg, hasHostC globals.Jumps[i] = uint32(i) } - log.WithField("globals", globals).Debugf("configure program") + log.WithField("globals", globals).Debugf("configure program v6") if err := tc.ConfigureProgramV6(m, ifaceLog, &globals); err != nil { return nil, fmt.Errorf("failed to configure tc program: %w", err) diff --git a/felix/bpf/ut/icmp_port_unreachable_test.go b/felix/bpf/ut/icmp_port_unreachable_test.go index a1f4c25901c..2d995bf69fb 100644 --- a/felix/bpf/ut/icmp_port_unreachable_test.go +++ b/felix/bpf/ut/icmp_port_unreachable_test.go @@ -22,7 +22,9 @@ import ( "github.com/google/gopacket/layers" . "github.com/onsi/gomega" + "github.com/projectcalico/calico/felix/bpf/counters" "github.com/projectcalico/calico/felix/bpf/nat" + "github.com/projectcalico/calico/felix/bpf/routes" tcdefs "github.com/projectcalico/calico/felix/bpf/tc/defs" ) @@ -198,3 +200,101 @@ func checkICMPv6PortUnreachable(pktR gopacket.Packet, ipv6 *layers.IPv6) { gopacket.NewPacket(cpkt.Bytes(), layers.LayerTypeEthernet, gopacket.Default). Layer(layers.LayerTypeICMPv6).(*layers.ICMPv6).Checksum)) } + +func TestSVCLoopPrevention(t *testing.T) { + RegisterTestingT(t) + + iphdr := *ipv4Default + + _, ipv4, _, _, pktBytesV4, err := testPacketV4(nil, &iphdr, nil, nil) + Expect(err).NotTo(HaveOccurred()) + _ = ipv4 + rtKey := routes.NewKey(dstV4CIDR).AsBytes() + rtVal := routes.NewValueWithIfIndex(routes.FlagBlackHoleDrop, 1).AsBytes() + err = rtMap.Update(rtKey, rtVal) + Expect(err).NotTo(HaveOccurred()) + _, ipv6, _, _, pktBytesV6, err := testPacketV6(nil, ipv6Default, nil, nil) + Expect(err).NotTo(HaveOccurred()) + + rtKeyV6 := routes.NewKeyV6(dstV6CIDR).AsBytes() + rtValV6 := routes.NewValueV6WithIfIndex(routes.FlagBlackHoleReject, 1).AsBytes() + err = rtMapV6.Update(rtKeyV6, rtValV6) + Expect(err).NotTo(HaveOccurred()) + + // Insert a reverse route for the source workload. + rtKeyW := routes.NewKey(srcV4CIDR).AsBytes() + rtValW := routes.NewValueWithIfIndex(routes.FlagsLocalWorkload|routes.FlagInIPAMPool, 1).AsBytes() + err = rtMap.Update(rtKeyW, rtValW) + Expect(err).NotTo(HaveOccurred()) + + defer func() { + err := rtMap.Delete(rtKey) + Expect(err).NotTo(HaveOccurred()) + err = rtMapV6.Delete(rtKeyV6) + Expect(err).NotTo(HaveOccurred()) + err = rtMap.Delete(rtKeyW) + Expect(err).NotTo(HaveOccurred()) + }() + + // Test with action = drop + skbMark = 0 + runBpfTest(t, "calico_from_host_ep", nil, func(bpfrun bpfProgRunFn) { + res, err := bpfrun(pktBytesV4) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RetvalStr()).To(Equal("TC_ACT_SHOT"), "expected program to return TC_ACT_SHOT") + + pktR := gopacket.NewPacket(res.dataOut, layers.LayerTypeEthernet, gopacket.Default) + fmt.Printf("pktR = %+v\n", pktR) + bpfCounters, err := counters.Read(countersMap, 1, 0) + Expect(err).NotTo(HaveOccurred()) + Expect(int(bpfCounters[counters.DroppedBlackholeRoute])).To(Equal(1)) + }) + + skbMark = 0 + runBpfTest(t, "calico_from_workload_ep", nil, func(bpfrun bpfProgRunFn) { + res, err := bpfrun(pktBytesV4) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RetvalStr()).To(Equal("TC_ACT_SHOT"), "expected program to return TC_ACT_SHOT") + + pktR := gopacket.NewPacket(res.dataOut, layers.LayerTypeEthernet, gopacket.Default) + fmt.Printf("pktR = %+v\n", pktR) + bpfCounters, err := counters.Read(countersMap, 1, 0) + Expect(err).NotTo(HaveOccurred()) + Expect(int(bpfCounters[counters.DroppedBlackholeRoute])).To(Equal(2)) + }) + + rtVal = routes.NewValueWithIfIndex(routes.FlagBlackHoleReject, 1).AsBytes() + err = rtMap.Update(rtKey, rtVal) + Expect(err).NotTo(HaveOccurred()) + + // Test with action = reject + skbMark = 0 + runBpfTest(t, "calico_from_host_ep", nil, func(bpfrun bpfProgRunFn) { + res, err := bpfrun(pktBytesV4) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RetvalStr()).To(Equal("TC_ACT_UNSPEC"), "expected program to return TC_ACT_UNSPEC") + + pktR := gopacket.NewPacket(res.dataOut, layers.LayerTypeEthernet, gopacket.Default) + fmt.Printf("pktR = %+v\n", pktR) + + checkICMPPortUnreachable(pktR, ipv4) + bpfCounters, err := counters.Read(countersMap, 1, 0) + Expect(err).NotTo(HaveOccurred()) + Expect(int(bpfCounters[counters.DroppedBlackholeRoute])).To(Equal(3)) + }) + + skbMark = 0 + runBpfTest(t, "calico_from_host_ep", nil, func(bpfrun bpfProgRunFn) { + res, err := bpfrun(pktBytesV6) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RetvalStr()).To(Equal("TC_ACT_UNSPEC"), "expected program to return TC_ACT_UNSPEC") + + pktR := gopacket.NewPacket(res.dataOut, layers.LayerTypeEthernet, gopacket.Default) + fmt.Printf("pktR = %+v\n", pktR) + + checkICMPv6PortUnreachable(pktR, ipv6) + bpfCounters, err := counters.Read(countersMap, 1, 0) + Expect(err).NotTo(HaveOccurred()) + Expect(int(bpfCounters[counters.DroppedBlackholeRoute])).To(Equal(4)) + }, withIPv6()) +} diff --git a/felix/dataplane/driver.go b/felix/dataplane/driver.go index a353e966586..258328d6da0 100644 --- a/felix/dataplane/driver.go +++ b/felix/dataplane/driver.go @@ -379,6 +379,7 @@ func StartDataplaneDriver(configParams *config.Config, RouteTableManager: routeTableIndexAllocator, MTUIfacePattern: configParams.MTUIfacePattern, BPFExcludeCIDRsFromNAT: configParams.BPFExcludeCIDRsFromNAT, + ServiceLoopPrevention: configParams.ServiceLoopPrevention, KubeClientSet: k8sClientSet, diff --git a/felix/dataplane/linux/bpf_route_mgr.go b/felix/dataplane/linux/bpf_route_mgr.go index e3ca337a554..4d9e4b3f89b 100644 --- a/felix/dataplane/linux/bpf_route_mgr.go +++ b/felix/dataplane/linux/bpf_route_mgr.go @@ -95,8 +95,10 @@ type bpfRouteManager struct { opReporter logutils.OpRecorder - wgEnabled bool - ipv6Enabled bool + wgEnabled bool + ipv6Enabled bool + blockedCIDRs set.Set[ip.CIDR] + svcLoopPrevention string } func newBPFRouteManager(config *Config, maps *bpfmap.Maps, @@ -105,6 +107,7 @@ func newBPFRouteManager(config *Config, maps *bpfmap.Maps, // Record the external node CIDRs and pre-mark them as dirty. These can only change with a config update, // which would restart Felix. extCIDRs := set.New[ip.CIDR]() + dirtyCIDRs := set.New[ip.CIDR]() for _, cidrStr := range config.ExternalNodesCidrs { if strings.Contains(cidrStr, ":") { @@ -132,6 +135,7 @@ func newBPFRouteManager(config *Config, maps *bpfmap.Maps, log.WithField("cidr", cidrStr).Debug("Ignoring IPv6 DSR optout CIDR") continue } + cidr, err := ip.ParseCIDROrIP(cidrStr) if err != nil { log.WithError(err).WithField("cidr", cidr).Error( @@ -160,6 +164,7 @@ func newBPFRouteManager(config *Config, maps *bpfmap.Maps, externalNodeCIDRs: extCIDRs, dirtyCIDRs: dirtyCIDRs, dsrOptoutCIDRs: noDsrCIDRs, + blockedCIDRs: set.New[ip.CIDR](), desiredRoutes: map[routes.KeyInterface]routes.ValueInterface{}, routeMap: maps.RouteMap, @@ -169,8 +174,9 @@ func newBPFRouteManager(config *Config, maps *bpfmap.Maps, opReporter: opReporter, - wgEnabled: config.Wireguard.Enabled || config.Wireguard.EnabledV6, - ipv6Enabled: config.BPFIpv6Enabled, + wgEnabled: config.Wireguard.Enabled || config.Wireguard.EnabledV6, + ipv6Enabled: config.BPFIpv6Enabled, + svcLoopPrevention: config.ServiceLoopPrevention, } if m.ipv6Enabled { @@ -212,6 +218,8 @@ func (m *bpfRouteManager) OnUpdate(msg interface{}) { m.onWorkloadEndpointUpdate(msg) case *proto.WorkloadEndpointRemove: m.onWorkloadEndpointRemove(msg) + case *proto.GlobalBGPConfigUpdate: + m.onBGPConfigUpdate(msg) } } @@ -306,6 +314,15 @@ func (m *bpfRouteManager) calculateRoute(cidr ip.CIDR) routes.ValueInterface { flags |= routes.FlagNoDSR } + if m.blockedCIDRs.Contains(cidr) { + log.WithField("cidr", cidr).Debug("CIDR is blocked.") + if m.svcLoopPrevention == "Drop" { + flags |= routes.FlagBlackHoleDrop + } else if m.svcLoopPrevention == "Reject" { + flags |= routes.FlagBlackHoleReject + } + } + cgRoute, cgRouteExists := m.cidrToRoute[cidr] if cgRouteExists { // Collect flags that are shared by all route types. @@ -387,14 +404,11 @@ func (m *bpfRouteManager) calculateRoute(cidr ip.CIDR) routes.ValueInterface { // k8s ExternalIP. Route resolver knew that it was assigned to our // hostname. flags |= routes.FlagsLocalHost - fallthrough - default: // proto.RouteType_CIDR_INFO / LOCAL_HOST or no route at all - if flags != 0 { - // We have something to say about this route. - route = m.bpfOps.NewValue(flags) - } } + if route == nil && flags != 0 { + route = m.bpfOps.NewValue(flags) + } return route } @@ -655,6 +669,45 @@ func (m *bpfRouteManager) onWorkloadEndpointRemove(update *proto.WorkloadEndpoin m.removeWEP(update.Id) } +func (m *bpfRouteManager) onBGPConfigUpdate(update *proto.GlobalBGPConfigUpdate) { + blockedCIDRs := []string{} + blockedCIDRs = append(blockedCIDRs, update.GetServiceClusterCidrs()...) + blockedCIDRs = append(blockedCIDRs, update.GetServiceExternalCidrs()...) + blockedCIDRs = append(blockedCIDRs, update.GetServiceLoadbalancerCidrs()...) + + cidrsToDel := set.New[ip.CIDR]() + cidrsToDel.AddSet(m.blockedCIDRs) + m.blockedCIDRs.Clear() + + for _, cidrStr := range blockedCIDRs { + cidr, err := ip.ParseCIDROrIP(cidrStr) + if err != nil { + log.WithError(err).WithField("cidr", cidr).Error( + "Failed to parse cidr.") + } + if !m.ipv6Enabled { + if _, ok := cidr.(ip.V4CIDR); !ok { + continue + } + } + + m.cidrToRoute[cidr] = proto.RouteUpdate{Type: proto.RouteType_CIDR_INFO} + if m.svcLoopPrevention != "Disabled" { + m.dirtyCIDRs.Add(cidr) + } + m.blockedCIDRs.Add(cidr) + cidrsToDel.Discard(cidr) + } + // Delete the unused routes. + cidrsToDel.Iter(func(cidr ip.CIDR) error { + if _, ok := m.cidrToRoute[cidr]; ok { + delete(m.cidrToRoute, cidr) + m.dirtyCIDRs.Add(cidr) + } + return set.RemoveItem + }) +} + func (m *bpfRouteManager) removeWEP(id *proto.WorkloadEndpointID) { oldWEP := m.wepIDToWorklaod[*id] if oldWEP == nil { diff --git a/felix/dataplane/linux/int_dataplane.go b/felix/dataplane/linux/int_dataplane.go index cf0e01933b3..01df7aa2a18 100644 --- a/felix/dataplane/linux/int_dataplane.go +++ b/felix/dataplane/linux/int_dataplane.go @@ -221,6 +221,7 @@ type Config struct { BPFExcludeCIDRsFromNAT []string KubeProxyMinSyncPeriod time.Duration SidecarAccelerationEnabled bool + ServiceLoopPrevention string LookPathOverride func(file string) (string, error) diff --git a/felix/fv/bpf_test.go b/felix/fv/bpf_test.go index 884c9b1111e..5fa6bf58e5f 100644 --- a/felix/fv/bpf_test.go +++ b/felix/fv/bpf_test.go @@ -5448,9 +5448,22 @@ func bpfDumpRoutes(felix *infrastructure.Felix) string { if felix.TopologyOptions.EnableIPv6 { out, err = felix.ExecOutput("calico-bpf", "-6", "routes", "dump") + ExpectWithOffset(1, err).NotTo(HaveOccurred()) } else { out, err = felix.ExecOutput("calico-bpf", "routes", "dump") + ExpectWithOffset(1, err).NotTo(HaveOccurred()) } + return out +} + +func bpfDumpRoutesV4(felix *infrastructure.Felix) string { + out, err := felix.ExecOutput("calico-bpf", "routes", "dump") + Expect(err).NotTo(HaveOccurred()) + return out +} + +func bpfDumpRoutesV6(felix *infrastructure.Felix) string { + out, err := felix.ExecOutput("calico-bpf", "-6", "routes", "dump") Expect(err).NotTo(HaveOccurred()) return out } diff --git a/felix/fv/service_loop_prevention_test.go b/felix/fv/service_loop_prevention_test.go index 83ed1fa96c6..405ef6e66b5 100644 --- a/felix/fv/service_loop_prevention_test.go +++ b/felix/fv/service_loop_prevention_test.go @@ -34,7 +34,7 @@ import ( "github.com/projectcalico/calico/libcalico-go/lib/options" ) -var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes", []apiconfig.DatastoreType{apiconfig.EtcdV3, apiconfig.Kubernetes}, func(getInfra infrastructure.InfraFactory) { +var _ = infrastructure.DatastoreDescribe("_BPF-SAFE_ service loop prevention; with 2 nodes", []apiconfig.DatastoreType{apiconfig.EtcdV3, apiconfig.Kubernetes}, func(getInfra infrastructure.InfraFactory) { var ( infra infrastructure.DatastoreInfra @@ -48,6 +48,9 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" options := infrastructure.DefaultTopologyOptions() options.IPIPEnabled = false tc, client = infrastructure.StartNNodeTopology(2, options, infra) + if BPFMode() { + ensureAllNodesBPFProgramsAttached(tc.Felixes) + } }) AfterEach(func() { @@ -66,24 +69,6 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" infra.Stop() }) - updateFelixConfig := func(deltaFn func(*api.FelixConfiguration)) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - cfg, err := client.FelixConfigurations().Get(ctx, "default", options.GetOptions{}) - if _, doesNotExist := err.(errors.ErrorResourceDoesNotExist); doesNotExist { - cfg = api.NewFelixConfiguration() - cfg.Name = "default" - deltaFn(cfg) - _, err = client.FelixConfigurations().Create(ctx, cfg, options.SetOptions{}) - Expect(err).NotTo(HaveOccurred()) - } else { - Expect(err).NotTo(HaveOccurred()) - deltaFn(cfg) - _, err = client.FelixConfigurations().Update(ctx, cfg, options.SetOptions{}) - Expect(err).NotTo(HaveOccurred()) - } - } - updateBGPConfig := func(deltaFn func(*api.BGPConfiguration)) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() @@ -116,7 +101,7 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" } } - tryRoutingLoop := func(expectLoop bool) { + tryRoutingLoop := func(expectLoop bool, count int) { // Run containers to model a default gateway, and an external client connecting to // services within the cluster via that gateway. @@ -166,14 +151,15 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" // gateway, resulting in MORE THAN 2 packets. Eventually(countServiceIPPackets).Should(BeNumerically(">", 2)) } else { - // Tcpdump should see just 1 packet, the request, with no response (because + // Tcpdump should see just 1 or 2 packets based on the ServiceLoopPrevention config. + // If set to Drop, we see 1 packet the request, with no response (because // we DROP) and no looping. - Eventually(countServiceIPPackets).Should(BeNumerically("==", 1)) + // If set to Reject, we see 2 packets, the request, ICMP unreachable. + Eventually(countServiceIPPackets).Should(BeNumerically("==", count)) } } - It("programs iptables as expected to block service routing loops", func() { - + It("programs dataplane as expected to block service routing loops", func() { By("configuring service cluster IPs") updateBGPConfig(func(cfg *api.BGPConfiguration) { cfg.Spec.ServiceClusterIPs = []api.ServiceClusterIPBlock{ @@ -190,62 +176,84 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" // chains with DROP. (Felix handles BGPConfiguration without restarting, so this // should be quick.) for _, felix := range tc.Felixes { - Eventually(getCIDRBlockRules(felix, "iptables-save")).Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d 10\\.96\\.0\\.0/17 .* -j DROP"), - )) - Eventually(getCIDRBlockRules(felix, "ip6tables-save")).Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d fd5f::/119 .* -j DROP"), - )) + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").Should(ContainSubstring("10.96.0.0/17: blackhole-drop")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save")).Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d 10\\.96\\.0\\.0/17 .* -j DROP"), + )) + Eventually(getCIDRBlockRules(felix, "ip6tables-save")).Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d fd5f::/119 .* -j DROP"), + )) + } } By("test that we don't get a routing loop") - tryRoutingLoop(false) + tryRoutingLoop(false, 1) By("configuring ServiceLoopPrevention=Reject") - updateFelixConfig(func(cfg *api.FelixConfiguration) { - cfg.Spec.ServiceLoopPrevention = "Reject" - }) + setSvcLoopPrevention(tc, "Reject") // Expect to see rules in cali-cidr-block chains with REJECT. (Allowing time for a // Felix restart.) for _, felix := range tc.Felixes { - Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d 10\\.96\\.0\\.0/17 .* -j REJECT"), - )) - Eventually(getCIDRBlockRules(felix, "ip6tables-save"), "8s", "0.5s").Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d fd5f::/119 .* -j REJECT"), - )) + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").Should(ContainSubstring("10.96.0.0/17: blackhole-reject")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d 10\\.96\\.0\\.0/17 .* -j REJECT"), + )) + Eventually(getCIDRBlockRules(felix, "ip6tables-save"), "8s", "0.5s").Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d fd5f::/119 .* -j REJECT"), + )) + } + } + By("test that we don't get a routing loop and ICMP back to the sender") + if BPFMode() { + tryRoutingLoop(false, 2) } By("configuring ServiceLoopPrevention=Disabled") - updateFelixConfig(func(cfg *api.FelixConfiguration) { - cfg.Spec.ServiceLoopPrevention = "Disabled" - }) + setSvcLoopPrevention(tc, "Disabled") // Expect to see empty cali-cidr-block chains. (Allowing time for a Felix restart.) for _, felix := range tc.Felixes { - Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(BeEmpty()) - Eventually(getCIDRBlockRules(felix, "ip6tables-save"), "8s", "0.5s").Should(BeEmpty()) + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").ShouldNot(ContainSubstring("10.96.0.0/17: blackhole")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(BeEmpty()) + Eventually(getCIDRBlockRules(felix, "ip6tables-save"), "8s", "0.5s").Should(BeEmpty()) + } } By("test that we DO get a routing loop") // (In order to test that the tryRoutingLoop setup is genuine.) - tryRoutingLoop(true) + tryRoutingLoop(true, 64) By("configuring ServiceLoopPrevention=Drop") - updateFelixConfig(func(cfg *api.FelixConfiguration) { - cfg.Spec.ServiceLoopPrevention = "Drop" - }) + setSvcLoopPrevention(tc, "Drop") // Expect to see rules in cali-cidr-block chains with DROP. (Allowing time for a // Felix restart.) for _, felix := range tc.Felixes { - Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d 10\\.96\\.0\\.0/17 .* -j DROP"), - )) - Eventually(getCIDRBlockRules(felix, "ip6tables-save"), "8s", "0.5s").Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d fd5f::/119 .* -j DROP"), - )) + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").Should(ContainSubstring("10.96.0.0/17: blackhole-drop")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d 10\\.96\\.0\\.0/17 .* -j DROP"), + )) + Eventually(getCIDRBlockRules(felix, "ip6tables-save"), "8s", "0.5s").Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d fd5f::/119 .* -j DROP"), + )) + } } By("updating the service CIDRs") @@ -263,12 +271,18 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" // Expect to see rules in cali-cidr-block chains with DROP and the updated CIDRs. // (BGPConfiguration change is handled without needing a restart.) for _, felix := range tc.Felixes { - Eventually(getCIDRBlockRules(felix, "iptables-save")).Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d 1\\.1\\.0\\.0/16 .* -j DROP"), - )) - Eventually(getCIDRBlockRules(felix, "ip6tables-save")).Should(ConsistOf( - MatchRegexp("-A cali-cidr-block -d fd5e::/119 .* -j DROP"), - )) + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").Should(ContainSubstring("1.1.0.0/16: blackhole-drop")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save")).Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d 1\\.1\\.0\\.0/16 .* -j DROP"), + )) + Eventually(getCIDRBlockRules(felix, "ip6tables-save")).Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d fd5e::/119 .* -j DROP"), + )) + } } By("resetting BGP config") @@ -287,24 +301,40 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" } }) + for _, felix := range tc.Felixes { + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").Should(ContainSubstring("10.96.0.0/17: blackhole-drop")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save")).Should(ConsistOf( + MatchRegexp("-A cali-cidr-block -d 10\\.96\\.0\\.0/17 .* -j DROP"), + )) + } + } + By("test that we don't get a routing loop") - tryRoutingLoop(false) + tryRoutingLoop(false, 1) By("configuring ServiceLoopPrevention=Disabled") - updateFelixConfig(func(cfg *api.FelixConfiguration) { - cfg.Spec.ServiceLoopPrevention = "Disabled" - }) + setSvcLoopPrevention(tc, "Disabled") // Expect to see empty cali-cidr-block chains. (Allowing time for a Felix // restart.) This ensures that the cali-cidr-block chain has been cleared // before we try a test ping. for _, felix := range tc.Felixes { - Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(BeEmpty()) + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").ShouldNot(ContainSubstring("10.96.0.0/17: blackhole")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(BeEmpty()) + } } By("test that we DO get a routing loop") // (In order to test that the tryRoutingLoop setup is genuine.) - tryRoutingLoop(true) + tryRoutingLoop(true, 64) By("resetting BGP config") updateBGPConfig(func(cfg *api.BGPConfiguration) { @@ -323,23 +353,27 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" }) By("test that we don't get a routing loop") - tryRoutingLoop(false) + tryRoutingLoop(false, 1) By("configuring ServiceLoopPrevention=Disabled") - updateFelixConfig(func(cfg *api.FelixConfiguration) { - cfg.Spec.ServiceLoopPrevention = "Disabled" - }) + setSvcLoopPrevention(tc, "Disabled") // Expect to see empty cali-cidr-block chains. (Allowing time for a Felix // restart.) This ensures that the cali-cidr-block chain has been cleared // before we try a test ping. for _, felix := range tc.Felixes { - Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(BeEmpty()) + if BPFMode() { + Eventually(func() string { + return bpfDumpRoutesV4(felix) + }, "10s", "1s").ShouldNot(ContainSubstring("10.96.0.0/17: blackhole")) + } else { + Eventually(getCIDRBlockRules(felix, "iptables-save"), "8s", "0.5s").Should(BeEmpty()) + } } By("test that we DO get a routing loop") // (In order to test that the tryRoutingLoop setup is genuine.) - tryRoutingLoop(true) + tryRoutingLoop(true, 64) By("resetting BGP config") updateBGPConfig(func(cfg *api.BGPConfiguration) { @@ -348,3 +382,11 @@ var _ = infrastructure.DatastoreDescribe("service loop prevention; with 2 nodes" }) }) + +func setSvcLoopPrevention(tc infrastructure.TopologyContainers, value string) { + for _, felix := range tc.Felixes { + felix.SetEnv(map[string]string{"FELIX_SERVICELOOPPREVENTION": value}) + felix.SetEnv(map[string]string{"FELIX_SERVICELOOPPREVENTION": value}) + felix.Restart() + } +}