From ea7024191532872c198a8b6bacd9a20671600d8a Mon Sep 17 00:00:00 2001 From: trevor tao Date: Mon, 5 Sep 2022 16:39:59 +0800 Subject: [PATCH] Let route unknown traffic skip the eBPF processing When we enable Calico eBPF dataplane, and a packet(e.g, a ping ICMP packet) destined for a VM of the host(VMs are ususually connected to host's physical interface through the macvtap/macvlan interface in either Bridge, VEPA or passthrough mode) from the physical interface, would be falsely bypassed by the eBPF program here and can't reach the target VM from the virtual interface (macvtap/macvlan). When a packet comes into the eBPF program of traffic control, its destination address(daddr) should be checked if it's for a known route by checking the route map, and if it's for an unknown route, it should be thought as it's not destined for this system, so we should just let it go through(skip) all our eBPF programs processing here by setting the action to TC_ACT_OK, which would skip for subsequent eBPF checkings and processings. So here we also should not check the unknown route traffic against FIB by bpf_fib_lookup (in forward_or_drop()), since in some systems, the lookup result would be successful like this: -0 [088] d.s. 1810775.267240: bpf_trace_printk: enp9s0---I: Traffic is towards the host namespace, doing Linux FIB lookup -0 [088] d.s. 1810775.267243: bpf_trace_printk: enp9s0---I: FIB lookup succeeded - with neigh -0 [088] d.s. 1810775.267244: bpf_trace_printk: enp9s0---I: Got Linux FIB hit, redirecting to iface 2. -0 [088] d.s. 1810775.267245: bpf_trace_printk: enp9s0---I: Traffic is towards host namespace, marking with 0x3000000. -0 [088] d.s. 1810775.267247: bpf_trace_printk: enp9s0---I: Final result=ALLOW (0). Program execution time: 31307ns -0 [088] d.s. 1810775.267249: bpf_trace_printk: enp9s0---E: New packet at ifindex=2; mark=3000000 -0 [088] d.s. 1810775.267250: bpf_trace_printk: enp9s0---E: Final result=ALLOW (3). Bypass mark bit set. and it's a wrong processing here since for the packet of a mark of 3000000 at the egress direction would be discarded by the system. On the other side, we also noticed in some systems, the issue of VM access blocking seems to be disappeared, and the packet can go through the eBPF program and finally reach the target VM. In this case, it does not mean the original action is correct, but just because the FIB lookup just fails here(see the log below), so the packet would be bypass by the eBPF program here with a mark 0x1000000: -0 [014] ..s. 17619198.981285: 0: eno1np0--I: Traffic is towards the host namespace, doing Linux FIB lookup -0 [014] ..s. 17619198.981287: 0: eno1np0--I: FIB lookup failed (FIB problem): 7. -0 [014] ..s. 17619198.981287: 0: eno1np0--I: Traffic is towards host namespace, marking with 0x1000000. -0 [014] ..s. 17619198.981288: 0: eno1np0--I: Final result=ALLOW (0). Program execution time: 16040ns So it can correctly skip the wrong marking action above. At the same time, we would like to say there is a similar processing for the unrelevant traffic in Cilium eBPF implementation: ep = lookup_ip4_endpoint(ip4); https://github.com/cilium/cilium/blob/master/bpf/bpf_host.c#L571 and if (!from_host) return CTX_ACT_OK; https://github.com/cilium/cilium/blob/master/bpf/bpf_host.c#L586 Here the endpoint of Cilium eBPF is similar to the route of Calico eBPF. This patch is also a fix for the issue of "VM access was blocked when eBPF dataplane used" https://github.com/projectcalico/calico/issues/6450 Signed-off-by: trevor tao --- felix/bpf-gpl/fib.h | 14 ++++++++++++++ felix/bpf-gpl/parsing.h | 21 +++++++++++++++++++++ felix/bpf-gpl/routes.h | 6 ++++++ felix/bpf-gpl/tc.c | 5 +++++ 4 files changed, 46 insertions(+) diff --git a/felix/bpf-gpl/fib.h b/felix/bpf-gpl/fib.h index f19327101c3..a7138e9179e 100644 --- a/felix/bpf-gpl/fib.h +++ b/felix/bpf-gpl/fib.h @@ -56,6 +56,11 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx) goto deny; } + if (rc == TC_ACT_OK) { + CALI_DEBUG("See TC_ACT_OK\n"); + goto ok; + } + if (rc == CALI_RES_REDIR_BACK) { int redir_flags = 0; if (CALI_F_FROM_HOST) { @@ -272,6 +277,15 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx) return rc; +ok: + if (CALI_LOG_LEVEL >= CALI_LOG_LEVEL_INFO) { + __u64 prog_end_time = bpf_ktime_get_ns(); + CALI_INFO("Final result=OK (%x). Program execution time: %lluns\n", + reason, prog_end_time-state->prog_start_time); + } + + return TC_ACT_OK; + deny: if (CALI_LOG_LEVEL >= CALI_LOG_LEVEL_INFO) { __u64 prog_end_time = bpf_ktime_get_ns(); diff --git a/felix/bpf-gpl/parsing.h b/felix/bpf-gpl/parsing.h index 4bd06895047..c72162ed4b3 100644 --- a/felix/bpf-gpl/parsing.h +++ b/felix/bpf-gpl/parsing.h @@ -8,6 +8,7 @@ #define PARSING_OK 0 #define PARSING_OK_V6 1 #define PARSING_ALLOW_WITHOUT_ENFORCING_POLICY 2 +#define PARSING_ALLOW_WITHOUT_ROUTE 3 #define PARSING_ERROR -1 static CALI_BPF_INLINE int parse_packet_ip(struct cali_tc_ctx *ctx) { @@ -63,6 +64,23 @@ static CALI_BPF_INLINE int parse_packet_ip(struct cali_tc_ctx *ctx) { } } + // For ingress packet, check if it has local route matched, for unknown packet, + // just allow it with CALI_REASON_RT_UNKNOWN + if (CALI_F_FROM_HEP) { + if (skb_refresh_validate_ptrs(ctx, UDP_SIZE)) { + ctx->fwd.reason = CALI_REASON_SHORT; + CALI_DEBUG("Too short\n"); + goto deny; + } + CALI_DEBUG("check if dst addr route is known\n"); + if (rt_addr_is_unknown(ip_hdr(ctx)->daddr)) { + ctx->fwd.reason = CALI_REASON_RT_UNKNOWN; + ctx->state->flags |= CALI_ST_SKIP_FIB; + CALI_DEBUG("No route found\n"); + goto allow_no_route; + } + } + // In TC programs, parse packet and validate its size. This is // already done for XDP programs at the beginning of the function. if (!CALI_F_XDP) { @@ -102,6 +120,9 @@ static CALI_BPF_INLINE int parse_packet_ip(struct cali_tc_ctx *ctx) { allow_no_fib: return PARSING_ALLOW_WITHOUT_ENFORCING_POLICY; +allow_no_route: + return PARSING_ALLOW_WITHOUT_ROUTE; + deny: return PARSING_ERROR; } diff --git a/felix/bpf-gpl/routes.h b/felix/bpf-gpl/routes.h index 12d7a765938..28052b51bba 100644 --- a/felix/bpf-gpl/routes.h +++ b/felix/bpf-gpl/routes.h @@ -71,6 +71,7 @@ static CALI_BPF_INLINE enum cali_rt_flags cali_rt_lookup_flags(__be32 addr) #define cali_rt_flags_local_workload(t) (((t) & CALI_RT_LOCAL) && ((t) & CALI_RT_WORKLOAD)) #define cali_rt_flags_remote_workload(t) (!((t) & CALI_RT_LOCAL) && ((t) & CALI_RT_WORKLOAD)) #define cali_rt_flags_remote_host(t) (((t) & (CALI_RT_LOCAL | CALI_RT_HOST)) == CALI_RT_HOST) +#define cali_rt_flags_unknown(t) ((t) == CALI_RT_UNKNOWN) static CALI_BPF_INLINE bool rt_addr_is_local_host(__be32 addr) { @@ -82,4 +83,9 @@ static CALI_BPF_INLINE bool rt_addr_is_remote_host(__be32 addr) return cali_rt_flags_remote_host(cali_rt_lookup_flags(addr)); } +static CALI_BPF_INLINE bool rt_addr_is_unknown(__be32 addr) +{ + return cali_rt_flags_unknown(cali_rt_lookup_flags(addr)); +} + #endif /* __CALI_ROUTES_H__ */ diff --git a/felix/bpf-gpl/tc.c b/felix/bpf-gpl/tc.c index 864e53523e2..59084fa10eb 100644 --- a/felix/bpf-gpl/tc.c +++ b/felix/bpf-gpl/tc.c @@ -172,6 +172,11 @@ static CALI_BPF_INLINE int calico_tc(struct __sk_buff *skb) fwd_fib_set(&ctx.fwd, false); ctx.fwd.res = TC_ACT_UNSPEC; goto finalize; + case PARSING_ALLOW_WITHOUT_ROUTE: + // A packet that we automatically let through + fwd_fib_set(&ctx.fwd, false); + ctx.fwd.res = TC_ACT_OK; + goto finalize; case PARSING_ERROR: default: // A malformed packet or a packet we don't support