Skip to content

Commit

Permalink
Let route unknown traffic skip the eBPF processing
Browse files Browse the repository at this point in the history
When we enable Calico eBPF dataplane, and a packet(e.g, a ping ICMP packet) destined
for a VM of the host(VMs are ususually connected to host's physical interface through
the macvtap/macvlan interface in either Bridge, VEPA or passthrough mode)
from the physical interface, would be falsely bypassed
by the eBPF program here and can't reach the target VM from the virtual interface
(macvtap/macvlan).

When a packet comes into the eBPF program of traffic control, its
destination address(daddr) should be checked if it's for a known route
by checking the route map, and if it's for an unknown route, it should
be thought as it's not destined for this system, so we should just let
it go through(skip) all our eBPF programs processing here by setting the
action to TC_ACT_OK, which would skip for subsequent eBPF checkings
and processings.

So here we also should not check the unknown route traffic against FIB by bpf_fib_lookup
(in forward_or_drop()), since in some systems, the lookup result would be successful
like this:
  <idle>-0       [088] d.s. 1810775.267240: bpf_trace_printk: enp9s0---I: Traffic is towards the host namespace, doing Linux FIB lookup
  <idle>-0       [088] d.s. 1810775.267243: bpf_trace_printk: enp9s0---I: FIB lookup succeeded - with neigh
  <idle>-0       [088] d.s. 1810775.267244: bpf_trace_printk: enp9s0---I: Got Linux FIB hit, redirecting to iface 2.
  <idle>-0       [088] d.s. 1810775.267245: bpf_trace_printk: enp9s0---I: Traffic is towards host namespace, marking with 0x3000000.
  <idle>-0       [088] d.s. 1810775.267247: bpf_trace_printk: enp9s0---I: Final result=ALLOW (0). Program execution time: 31307ns
  <idle>-0       [088] d.s. 1810775.267249: bpf_trace_printk: enp9s0---E: New packet at ifindex=2; mark=3000000
  <idle>-0       [088] d.s. 1810775.267250: bpf_trace_printk: enp9s0---E: Final result=ALLOW (3). Bypass mark bit set.

and it's a wrong processing here since for the packet of a mark of 3000000 at
the egress direction would be discarded by the system.

On the other side, we also noticed in some systems, the issue of VM
access blocking seems to be disappeared, and the packet can go through the
eBPF program and finally reach the target VM. In this case, it does not mean the
original action is correct, but just because the FIB lookup just fails here(see the log
below), so the packet would be bypass by the eBPF program here with a mark 0x1000000:
  <idle>-0       [014] ..s. 17619198.981285: 0: eno1np0--I: Traffic is towards the host namespace, doing Linux FIB lookup
  <idle>-0       [014] ..s. 17619198.981287: 0: eno1np0--I: FIB lookup failed (FIB problem): 7.
  <idle>-0       [014] ..s. 17619198.981287: 0: eno1np0--I: Traffic is towards host namespace, marking with 0x1000000.
  <idle>-0       [014] ..s. 17619198.981288: 0: eno1np0--I: Final result=ALLOW (0). Program execution time: 16040ns
So it can correctly skip the wrong marking action above.

At the same time, we would like to say there is a similar processing for
the unrelevant traffic in Cilium eBPF implementation:
        ep = lookup_ip4_endpoint(ip4);
https://github.com/cilium/cilium/blob/master/bpf/bpf_host.c#L571

and
        if (!from_host)
                return CTX_ACT_OK;
https://github.com/cilium/cilium/blob/master/bpf/bpf_host.c#L586

Here the endpoint of Cilium eBPF is similar to the route of Calico eBPF.

This patch is also a fix for the issue of
"VM access was blocked when eBPF dataplane used"
projectcalico#6450

Signed-off-by: trevor tao <[email protected]>
  • Loading branch information
TrevorTaoARM committed Oct 3, 2022
1 parent c4f28c9 commit ea70241
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 0 deletions.
14 changes: 14 additions & 0 deletions felix/bpf-gpl/fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)
goto deny;
}

if (rc == TC_ACT_OK) {
CALI_DEBUG("See TC_ACT_OK\n");
goto ok;
}

if (rc == CALI_RES_REDIR_BACK) {
int redir_flags = 0;
if (CALI_F_FROM_HOST) {
Expand Down Expand Up @@ -272,6 +277,15 @@ static CALI_BPF_INLINE int forward_or_drop(struct cali_tc_ctx *ctx)

return rc;

ok:
if (CALI_LOG_LEVEL >= CALI_LOG_LEVEL_INFO) {
__u64 prog_end_time = bpf_ktime_get_ns();
CALI_INFO("Final result=OK (%x). Program execution time: %lluns\n",
reason, prog_end_time-state->prog_start_time);
}

return TC_ACT_OK;

deny:
if (CALI_LOG_LEVEL >= CALI_LOG_LEVEL_INFO) {
__u64 prog_end_time = bpf_ktime_get_ns();
Expand Down
21 changes: 21 additions & 0 deletions felix/bpf-gpl/parsing.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define PARSING_OK 0
#define PARSING_OK_V6 1
#define PARSING_ALLOW_WITHOUT_ENFORCING_POLICY 2
#define PARSING_ALLOW_WITHOUT_ROUTE 3
#define PARSING_ERROR -1

static CALI_BPF_INLINE int parse_packet_ip(struct cali_tc_ctx *ctx) {
Expand Down Expand Up @@ -63,6 +64,23 @@ static CALI_BPF_INLINE int parse_packet_ip(struct cali_tc_ctx *ctx) {
}
}

// For ingress packet, check if it has local route matched, for unknown packet,
// just allow it with CALI_REASON_RT_UNKNOWN
if (CALI_F_FROM_HEP) {
if (skb_refresh_validate_ptrs(ctx, UDP_SIZE)) {
ctx->fwd.reason = CALI_REASON_SHORT;
CALI_DEBUG("Too short\n");
goto deny;
}
CALI_DEBUG("check if dst addr route is known\n");
if (rt_addr_is_unknown(ip_hdr(ctx)->daddr)) {
ctx->fwd.reason = CALI_REASON_RT_UNKNOWN;
ctx->state->flags |= CALI_ST_SKIP_FIB;
CALI_DEBUG("No route found\n");
goto allow_no_route;
}
}

// In TC programs, parse packet and validate its size. This is
// already done for XDP programs at the beginning of the function.
if (!CALI_F_XDP) {
Expand Down Expand Up @@ -102,6 +120,9 @@ static CALI_BPF_INLINE int parse_packet_ip(struct cali_tc_ctx *ctx) {
allow_no_fib:
return PARSING_ALLOW_WITHOUT_ENFORCING_POLICY;

allow_no_route:
return PARSING_ALLOW_WITHOUT_ROUTE;

deny:
return PARSING_ERROR;
}
Expand Down
6 changes: 6 additions & 0 deletions felix/bpf-gpl/routes.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ static CALI_BPF_INLINE enum cali_rt_flags cali_rt_lookup_flags(__be32 addr)
#define cali_rt_flags_local_workload(t) (((t) & CALI_RT_LOCAL) && ((t) & CALI_RT_WORKLOAD))
#define cali_rt_flags_remote_workload(t) (!((t) & CALI_RT_LOCAL) && ((t) & CALI_RT_WORKLOAD))
#define cali_rt_flags_remote_host(t) (((t) & (CALI_RT_LOCAL | CALI_RT_HOST)) == CALI_RT_HOST)
#define cali_rt_flags_unknown(t) ((t) == CALI_RT_UNKNOWN)

static CALI_BPF_INLINE bool rt_addr_is_local_host(__be32 addr)
{
Expand All @@ -82,4 +83,9 @@ static CALI_BPF_INLINE bool rt_addr_is_remote_host(__be32 addr)
return cali_rt_flags_remote_host(cali_rt_lookup_flags(addr));
}

static CALI_BPF_INLINE bool rt_addr_is_unknown(__be32 addr)
{
return cali_rt_flags_unknown(cali_rt_lookup_flags(addr));
}

#endif /* __CALI_ROUTES_H__ */
5 changes: 5 additions & 0 deletions felix/bpf-gpl/tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ static CALI_BPF_INLINE int calico_tc(struct __sk_buff *skb)
fwd_fib_set(&ctx.fwd, false);
ctx.fwd.res = TC_ACT_UNSPEC;
goto finalize;
case PARSING_ALLOW_WITHOUT_ROUTE:
// A packet that we automatically let through
fwd_fib_set(&ctx.fwd, false);
ctx.fwd.res = TC_ACT_OK;
goto finalize;
case PARSING_ERROR:
default:
// A malformed packet or a packet we don't support
Expand Down

0 comments on commit ea70241

Please sign in to comment.