diff --git a/control/control_plane_core.go b/control/control_plane_core.go index e4b30eda40..d8a5385186 100644 --- a/control/control_plane_core.go +++ b/control/control_plane_core.go @@ -471,38 +471,6 @@ func (c *controlPlaneCore) _bindLan(ifname string) error { } return nil }) - - // Insert filters. - filterEgress := &netlink.BpfFilter{ - FilterAttrs: netlink.FilterAttrs{ - LinkIndex: link.Attrs().Index, - Parent: netlink.HANDLE_MIN_EGRESS, - Handle: netlink.MakeHandle(0x2023, 0b010+uint16(c.flip)), - Protocol: unix.ETH_P_ALL, - // Priority should be front of WAN's - Priority: 1, - }, - Fd: c.bpf.bpfPrograms.TproxyLanEgress.FD(), - Name: consts.AppName + "_lan_egress", - DirectAction: true, - } - // Remove and add. - _ = netlink.FilterDel(filterEgress) - if !c.isReload { - // Clean up thoroughly. - filterEgressFlipped := deepcopy.Copy(filterEgress).(*netlink.BpfFilter) - filterEgressFlipped.FilterAttrs.Handle ^= 1 - _ = netlink.FilterDel(filterEgressFlipped) - } - if err := netlink.FilterAdd(filterEgress); err != nil { - return fmt.Errorf("cannot attach ebpf object to filter egress: %w", err) - } - c.deferFuncs = append(c.deferFuncs, func() error { - if err := netlink.FilterDel(filterEgress); err != nil { - return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterEgress.Name, err) - } - return nil - }) return nil } diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index d1025b5beb..216114a0b8 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -399,159 +399,6 @@ static __always_inline bool equal16(const __be32 x[4], const __be32 y[4]) { #endif } -static __always_inline __u32 l4_checksum_rel_off(__u8 proto) { - switch (proto) { - case IPPROTO_TCP: - return offsetof(struct tcphdr, check); - - case IPPROTO_UDP: - return offsetof(struct udphdr, check); - } - return 0; -} - -static __always_inline __u32 l4_checksum_off(__u32 link_h_len, __u8 proto, - __u8 ihl) { - return link_h_len + ihl * 4 + l4_checksum_rel_off(proto); -} - -static __always_inline int disable_l4_checksum(struct __sk_buff *skb, - __u32 link_h_len, __u8 l4proto, - __u8 ihl) { - __u32 l4_cksm_off = l4_checksum_off(link_h_len, l4proto, ihl); - // Set checksum zero. - __sum16 bak_cksm = 0; - return bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0); -} - -static __always_inline int rewrite_ip(struct __sk_buff *skb, __u32 link_h_len, - __u8 proto, __u8 ihl, __be32 old_ip[4], - __be32 new_ip[4], bool is_dest, - bool disable_l4_checksum) { - // Nothing to do. - if (equal16(old_ip, new_ip)) { - return 0; - } - // bpf_printk("%pI6->%pI6", old_ip, new_ip); - - __u32 l4_cksm_off = l4_checksum_off(link_h_len, proto, ihl); - int ret; - // BPF_F_PSEUDO_HDR indicates the part we want to modify is part of the - // pseudo header. - __u32 l4flags = BPF_F_PSEUDO_HDR; - if (proto == IPPROTO_UDP) { - l4flags |= BPF_F_MARK_MANGLED_0; - } - - if (skb->protocol == bpf_htons(ETH_P_IP)) { - - __be32 _old_ip = old_ip[3]; - __be32 _new_ip = new_ip[3]; - - int ret; - - if (!disable_l4_checksum) { - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, _old_ip, _new_ip, - l4flags | sizeof(_new_ip)))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; - } - } - - if ((ret = bpf_l3_csum_replace(skb, IPV4_CSUM_OFF(link_h_len), _old_ip, - _new_ip, sizeof(_new_ip)))) { - return ret; - } - // bpf_printk("%pI4 -> %pI4", &_old_ip, &_new_ip); - - ret = bpf_skb_store_bytes( - skb, is_dest ? IPV4_DST_OFF(link_h_len) : IPV4_SRC_OFF(link_h_len), - &_new_ip, sizeof(_new_ip), 0); - if (ret) { - bpf_printk("bpf_skb_store_bytes: %d", ret); - return ret; - } - } else { - - if (!disable_l4_checksum) { - __s64 cksm = - bpf_csum_diff(old_ip, IPV6_BYTE_LENGTH, new_ip, IPV6_BYTE_LENGTH, 0); - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, l4flags))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; - } - } - - // bpf_printk("%pI6 -> %pI6", old_ip, new_ip); - - ret = bpf_skb_store_bytes( - skb, is_dest ? IPV6_DST_OFF(link_h_len) : IPV6_SRC_OFF(link_h_len), - new_ip, IPV6_BYTE_LENGTH, 0); - if (ret) { - bpf_printk("bpf_skb_store_bytes: %d", ret); - return ret; - } - } - - return 0; -} - -static __always_inline int rewrite_port(struct __sk_buff *skb, __u32 link_h_len, - __u8 proto, __u8 ihl, __be16 old_port, - __be16 new_port, bool is_dest, - bool disable_l4_checksum) { - // Nothing to do. - if (old_port == new_port) { - return 0; - } - - __u32 cksm_off = l4_checksum_off(link_h_len, proto, ihl), - port_off = link_h_len + ihl * 4; - if (!cksm_off) { - return -EINVAL; - } - __u32 l4flags = 0; - switch (proto) { - case IPPROTO_TCP: - if (is_dest) { - port_off += offsetof(struct tcphdr, dest); - } else { - port_off += offsetof(struct tcphdr, source); - } - break; - - case IPPROTO_UDP: - if (is_dest) { - port_off += offsetof(struct udphdr, dest); - } else { - port_off += offsetof(struct udphdr, source); - } - l4flags |= BPF_F_MARK_MANGLED_0; - break; - - default: - return -EINVAL; - } - - // bpf_printk("%u -> %u", bpf_ntohs(old_port), bpf_ntohs(new_port)); - - int ret; - - if (!disable_l4_checksum) { - if ((ret = bpf_l4_csum_replace(skb, cksm_off, old_port, new_port, - l4flags | sizeof(new_port)))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; - } - } - - if ((ret = bpf_skb_store_bytes(skb, port_off, &new_port, sizeof(new_port), - 0))) { - return ret; - } - return 0; -} - static __always_inline int handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, struct icmp6hdr *icmp6h, struct tcphdr *tcph, @@ -715,176 +562,6 @@ parse_transport(const struct __sk_buff *skb, __u32 link_h_len, } } -static __always_inline int adjust_udp_len(struct __sk_buff *skb, - __u32 link_h_len, __u16 oldlen, - __u32 ihl, __u16 len_diff, - bool disable_l4_checksum) { - if (unlikely(!len_diff)) { - return 0; - } - - // Boundary check. - if (len_diff > 0) { - if (unlikely(bpf_ntohs(oldlen) + len_diff < len_diff)) { // overflow - bpf_printk("udp length overflow"); - return -EINVAL; - } - } else { - if (unlikely((__s32)bpf_ntohs(oldlen) + len_diff < 0)) { // not enough - bpf_printk("udp length not enough"); - return -EINVAL; - } - } - __be16 newlen = bpf_htons(bpf_ntohs(oldlen) + len_diff); - - // Calculate checksum and store the new value. - int ret; - __u32 udp_csum_off = l4_checksum_off(link_h_len, IPPROTO_UDP, ihl); - - if (!disable_l4_checksum) { // replace twice because len exists both pseudo - // hdr and hdr. - if ((ret = bpf_l4_csum_replace( - skb, udp_csum_off, oldlen, newlen, - sizeof(oldlen) | BPF_F_PSEUDO_HDR | // udp len is in the pseudo hdr - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); - return ret; - } - if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, oldlen, newlen, - sizeof(oldlen) | BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); - return ret; - } - } - - if ((ret = bpf_skb_store_bytes( - skb, link_h_len + ihl * 4 + offsetof(struct udphdr, len), &newlen, - sizeof(oldlen), 0))) { - bpf_printk("bpf_skb_store_bytes newudplen: %d", ret); - return ret; - } - return 0; -} - -static __always_inline int adjust_ipv4_len(struct __sk_buff *skb, - __u32 link_h_len, __u16 oldlen, - __u16 len_diff) { - if (unlikely(!len_diff)) { - return 0; - } - - // Boundary check. - if (len_diff > 0) { - if (unlikely(bpf_ntohs(oldlen) + len_diff < len_diff)) { // overflow - bpf_printk("ip length overflow"); - return -EINVAL; - } - } else { - if (unlikely((__s32)bpf_ntohs(oldlen) + len_diff < 0)) { // not enough - bpf_printk("ip length not enough"); - return -EINVAL; - } - } - __be16 newlen = bpf_htons(bpf_ntohs(oldlen) + len_diff); - - // Calculate checksum and store the new value. - int ret; - if ((ret = bpf_l3_csum_replace(skb, IPV4_CSUM_OFF(link_h_len), oldlen, newlen, - sizeof(oldlen)))) { - bpf_printk("bpf_l3_csum_replace newudplen: %d", ret); - return ret; - } - if ((ret = bpf_skb_store_bytes(skb, - link_h_len + offsetof(struct iphdr, tot_len), - &newlen, sizeof(oldlen), 0))) { - bpf_printk("bpf_skb_store_bytes newiplen: %d", ret); - return ret; - } - return 0; -} - -static __always_inline int -decap_after_udp_hdr(struct __sk_buff *skb, __u32 link_h_len, __u8 ihl, - __be16 ipv4hdr_tot_len, void *to, __u32 decap_hdrlen, - bool (*prevent_pop)(void *to), bool disable_l4_checksum) { - if (unlikely(decap_hdrlen % 4 != 0)) { - bpf_printk("encap_after_udp_hdr: unexpected decap_hdrlen value %u :must " - "be a multiple of 4", - decap_hdrlen); - return -EINVAL; - } - int ret = 0; - long ip_off = link_h_len; - // Calculate offsets using add instead of subtract to avoid verifier problems. - long ipp_len = ihl * 4; - - // Must check lower boundary for packet offset (and set the type of the - // variables to signed long). - if (skb->data + ip_off + ipp_len > skb->data_end) { - return -EINVAL; - } - - // Backup for further use. - struct udphdr reserved_udphdr; - if ((ret = bpf_skb_load_bytes(skb, ip_off + ipp_len, &reserved_udphdr, - sizeof(struct udphdr)))) { - bpf_printk("bpf_skb_load_bytes: %d", ret); - return ret; - } - - // Load the hdr to decap. - if ((ret = bpf_skb_load_bytes(skb, ip_off + ipp_len + sizeof(struct udphdr), - to, decap_hdrlen))) { - bpf_printk("bpf_skb_load_bytes decap_hdr: %d", ret); - return ret; - } - - // Move the udphdr to the front of the real UDP payload. - if ((ret = - bpf_skb_store_bytes(skb, ip_off + ipp_len + decap_hdrlen, - &reserved_udphdr, sizeof(reserved_udphdr), 0))) { - bpf_printk("bpf_skb_store_bytes reserved_udphdr: %d", ret); - return ret; - } - - if (prevent_pop == NULL || !prevent_pop(to)) { - // Adjust room to decap the header. - if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET, - BPF_F_ADJ_ROOM_NO_CSUM_RESET))) { - bpf_printk("UDP ADJUST ROOM(decap): %d", ret); - return ret; - } - - // Rewrite ip len. - if (skb->protocol == bpf_htons(ETH_P_IP)) { - if ((ret = adjust_ipv4_len(skb, link_h_len, ipv4hdr_tot_len, - -decap_hdrlen))) { - bpf_printk("adjust_ip_len: %d", ret); - return ret; - } - } - - // Rewrite udp len. - if ((ret = adjust_udp_len(skb, link_h_len, reserved_udphdr.len, ihl, - -decap_hdrlen, disable_l4_checksum))) { - bpf_printk("adjust_udp_len: %d", ret); - return ret; - } - - if (!disable_l4_checksum) { - // Rewrite udp checksum. - __u32 udp_csum_off = l4_checksum_off(link_h_len, IPPROTO_UDP, ihl); - __s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0); - if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm, - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace 2: %d", ret); - return ret; - } - } - } - return 0; -} - // Do not use __always_inline here because this function is too heavy. // low -> high: outbound(8b) mark(32b) unused(23b) sign(1b) static __s64 __attribute__((noinline)) @@ -1134,11 +811,6 @@ route(const __u32 flag[8], const void *l4hdr, const __be32 saddr[4], #undef _dscp } -static bool __always_inline is_not_to_lan(void *_ori_src) { - struct dst_routing_result *ori_src = _ori_src; - return ori_src->routing_result.outbound == IS_WAN; -} - static __always_inline __u32 get_link_h_len(__u32 ifindex, volatile __u32 *link_h_len) { __u32 *plink_h_len = bpf_map_lookup_elem(&linklen_map, &ifindex); @@ -1188,78 +860,6 @@ assign_socket(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, return assign_socket_udp(skb, tuple, len); } -// SNAT for UDP packet. -SEC("tc/egress") -int tproxy_lan_egress(struct __sk_buff *skb) { - if (skb->ingress_ifindex != NOWHERE_IFINDEX) { - return TC_ACT_PIPE; - } - struct ethhdr ethh; - struct iphdr iph; - struct ipv6hdr ipv6h; - struct icmp6hdr icmp6h; - struct tcphdr tcph; - struct udphdr udph; - __u8 ihl; - __u8 l4proto; - __u32 link_h_len; - if (get_link_h_len(skb->ifindex, &link_h_len)) { - return TC_ACT_OK; - } - int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l4proto); - if (ret) { - bpf_printk("parse_transport: %d", ret); - return TC_ACT_OK; - } - switch (l4proto) { - case IPPROTO_ICMPV6: - if (icmp6h.icmp6_type == 137) { - // REDIRECT (NDP) - return TC_ACT_SHOT; - } - return TC_ACT_PIPE; - case IPPROTO_UDP: - break; - default: - return TC_ACT_PIPE; - } - - __be16 tproxy_port = PARAM.tproxy_port; - if (!tproxy_port) { - return TC_ACT_PIPE; - } - struct tuples tuples; - get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto); - if (tproxy_port != tuples.five.sport) { - return TC_ACT_PIPE; - } - - struct dst_routing_result ori_src; - if ((ret = decap_after_udp_hdr( - skb, link_h_len, ihl, - skb->protocol == bpf_htons(ETH_P_IP) ? iph.tot_len : 0, &ori_src, - sizeof(ori_src), is_not_to_lan, true))) { - return TC_ACT_SHOT; - } - if (is_not_to_lan(&ori_src)) { - return TC_ACT_PIPE; - } - if ((ret = rewrite_ip(skb, link_h_len, l4proto, ihl, - tuples.five.sip.u6_addr32, ori_src.ip, false, true))) { - return TC_ACT_SHOT; - } - if ((ret = rewrite_port(skb, link_h_len, l4proto, ihl, tuples.five.sport, - ori_src.port, false, true))) { - return TC_ACT_SHOT; - } - disable_l4_checksum(skb, link_h_len, l4proto, ihl); - // bpf_printk("from %pI6 to %pI6", tuples.five.sip, ori_src.ip); - // bpf_printk("from %u to %u", bpf_ntohs(tuples.five.sport), - // bpf_ntohs(ori_src.port)); - return TC_ACT_OK; -} - SEC("tc/ingress") int tproxy_lan_ingress(struct __sk_buff *skb) { struct ethhdr ethh;