From 501f665a5a4b3eafa75f020ab77c1d62f7840172 Mon Sep 17 00:00:00 2001 From: Paolo Valerio Date: Wed, 12 Jul 2023 11:16:43 +0200 Subject: [PATCH] conntrack: Extract l4 information for SCTP. Since a27d70a89 ("conntrack: add generic IP protocol support") all the unrecognized IP protocols get handled using ct_proto_other ops and are managed as L3 using 3 tuples. This patch stores L4 information for SCTP in the conn_key so that multiple conn instances, instead of one with ports zeroed, will be created when there are multiple SCTP connections between two hosts. It also performs crc32c check when not offloaded, and adds SCTP to pat_enabled. With this patch, given two SCTP association between two hosts, tracking the connection will result in: sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=55884,dport=5201), reply=(src=10.1.1.1,dst=10.1.1.2,sport=5201,dport=12345),zone=1 sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=59874,dport=5202), reply=(src=10.1.1.1,dst=10.1.1.2,sport=5202,dport=12346),zone=1 instead of: sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=0,dport=0), reply=(src=10.1.1.1,dst=10.1.1.2,sport=0,dport=0),zone=1 Signed-off-by: Paolo Valerio Signed-off-by: Ilya Maximets --- NEWS | 1 + lib/conntrack.c | 86 +++++++++++++++++++++++++++++++- lib/packets.h | 11 ++++ tests/system-kmod-macros.at | 11 ++++ tests/system-traffic.at | 73 +++++++++++++++++++++++++++ tests/system-userspace-macros.at | 7 +++ 6 files changed, 188 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index eedaad07b13..01e8219bfa5 100644 --- a/NEWS +++ b/NEWS @@ -42,6 +42,7 @@ Post-v3.1.0 - SRv6 Tunnel Protocol * Added support for userspace datapath (only). - Userspace datapath: + * Connection tracking now supports extraction of SCTP L4 information. * Implementation of OpenFlow meters is now lockless allowing for better multi-thread scalability. * IP and L4 checksum offload support is now enabled by default for diff --git a/lib/conntrack.c b/lib/conntrack.c index 4375c03e2b8..5f1176d333f 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -27,6 +27,7 @@ #include "conntrack-private.h" #include "conntrack-tp.h" #include "coverage.h" +#include "crc32c.h" #include "csum.h" #include "ct-dpif.h" #include "dp-packet.h" @@ -41,6 +42,7 @@ #include "random.h" #include "rculist.h" #include "timeval.h" +#include "unaligned.h" VLOG_DEFINE_THIS_MODULE(conntrack); @@ -771,6 +773,8 @@ pat_packet(struct dp_packet *pkt, const struct conn_key *key) packet_set_tcp_port(pkt, key->dst.port, key->src.port); } else if (key->nw_proto == IPPROTO_UDP) { packet_set_udp_port(pkt, key->dst.port, key->src.port); + } else if (key->nw_proto == IPPROTO_SCTP) { + packet_set_sctp_port(pkt, key->dst.port, key->src.port); } } @@ -1675,6 +1679,26 @@ checksum_valid(const struct conn_key *key, const void *data, size_t size, return valid; } +static inline bool +sctp_checksum_valid(const void *data, size_t size) +{ + struct sctp_header *sctp = (struct sctp_header *) data; + ovs_be32 rcvd_csum, csum; + bool ret; + + rcvd_csum = get_16aligned_be32(&sctp->sctp_csum); + put_16aligned_be32(&sctp->sctp_csum, 0); + csum = crc32c(data, size); + put_16aligned_be32(&sctp->sctp_csum, rcvd_csum); + + ret = (rcvd_csum == csum); + if (!ret) { + COVERAGE_INC(conntrack_l4csum_err); + } + + return ret; +} + static inline bool check_l4_tcp(const struct conn_key *key, const void *data, size_t size, const void *l3, bool validate_checksum) @@ -1711,6 +1735,47 @@ check_l4_udp(const struct conn_key *key, const void *data, size_t size, || (validate_checksum ? checksum_valid(key, data, size, l3) : true); } +static inline bool +sctp_check_len(const struct sctp_header *sh, size_t size) +{ + const struct sctp_chunk_header *sch; + size_t next; + + if (size < SCTP_HEADER_LEN) { + return false; + } + + /* rfc4960: Chunks (including Type, Length, and Value fields) are padded + * out by the sender with all zero bytes to be a multiple of 4 bytes long. + */ + for (next = sizeof(struct sctp_header), + sch = SCTP_NEXT_CHUNK(sh, next); + next < size; + next += ROUND_UP(ntohs(sch->length), 4), + sch = SCTP_NEXT_CHUNK(sh, next)) { + /* rfc4960: This value represents the size of the chunk in bytes, + * including the Chunk Type, Chunk Flags, Chunk Length, and Chunk Value + * fields. + * Therefore, if the Chunk Value field is zero-length, the Length + * field will be set to 4. */ + if (ntohs(sch->length) < sizeof *sch) { + return false; + } + } + + return (next == size); +} + +static inline bool +check_l4_sctp(const void *data, size_t size, bool validate_checksum) +{ + if (OVS_UNLIKELY(!sctp_check_len(data, size))) { + return false; + } + + return validate_checksum ? sctp_checksum_valid(data, size) : true; +} + static inline bool check_l4_icmp(const void *data, size_t size, bool validate_checksum) { @@ -1761,6 +1826,21 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size, return key->src.port && key->dst.port; } +static inline bool +extract_l4_sctp(struct conn_key *key, const void *data, size_t size, + size_t *chk_len) +{ + if (OVS_UNLIKELY(size < (chk_len ? *chk_len : SCTP_HEADER_LEN))) { + return false; + } + + const struct sctp_header *sctp = data; + key->src.port = sctp->sctp_src; + key->dst.port = sctp->sctp_dst; + + return key->src.port && key->dst.port; +} + static inline bool extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, const void *l3, bool validate_checksum, size_t *chk_len); @@ -1976,6 +2056,9 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, return (!related || check_l4_udp(key, data, size, l3, validate_checksum)) && extract_l4_udp(key, data, size, chk_len); + } else if (key->nw_proto == IPPROTO_SCTP) { + return (!related || check_l4_sctp(data, size, validate_checksum)) + && extract_l4_sctp(key, data, size, chk_len); } else if (key->dl_type == htons(ETH_TYPE_IP) && key->nw_proto == IPPROTO_ICMP) { return (!related || check_l4_icmp(data, size, validate_checksum)) @@ -2374,7 +2457,8 @@ nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, uint32_t hash = nat_range_hash(conn, ct->hash_basis, nat_info); union ct_addr min_addr = {0}, max_addr = {0}, addr = {0}; bool pat_proto = conn->key.nw_proto == IPPROTO_TCP || - conn->key.nw_proto == IPPROTO_UDP; + conn->key.nw_proto == IPPROTO_UDP || + conn->key.nw_proto == IPPROTO_SCTP; uint16_t min_dport, max_dport, curr_dport; uint16_t min_sport, max_sport, curr_sport; diff --git a/lib/packets.h b/lib/packets.h index 200b25cf012..12245b7649a 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -854,6 +854,17 @@ struct sctp_header { }; BUILD_ASSERT_DECL(SCTP_HEADER_LEN == sizeof(struct sctp_header)); +#define SCTP_CHUNK_HEADER_LEN 4 +struct sctp_chunk_header { + uint8_t type; + uint8_t flags; + ovs_be16 length; +}; +BUILD_ASSERT_DECL(SCTP_CHUNK_HEADER_LEN == sizeof(struct sctp_chunk_header)); + +#define SCTP_NEXT_CHUNK(sh, off) \ + ALIGNED_CAST(struct sctp_chunk_header *, (uint8_t *) sh + off) + #define UDP_HEADER_LEN 8 struct udp_header { ovs_be16 udp_src; diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index 81601390ddb..5203b1df808 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -112,6 +112,17 @@ m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], AT_SKIP_IF([test "$IS_WIN32" = "yes"]) ]) +# CHECK_CONNTRACK_SCTP() +# +# Perform requirements checks for running conntrack SCTP. The kernel +# optionally support nf proto sctp. +# +m4_define([CHECK_CONNTRACK_SCTP], +[ + AT_SKIP_IF([test "$IS_WIN32" = "yes"]) + AT_SKIP_IF([! test -e /proc/sys/net/netfilter/nf_conntrack_sctp_timeout_closed]) +]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. diff --git a/tests/system-traffic.at b/tests/system-traffic.at index a05ca311ca8..9f07f45a36a 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -4701,6 +4701,79 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - SCTP SNAT with port range]) +CHECK_CONNTRACK() +CHECK_CONNTRACK_SCTP() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") +NS_CHECK_EXEC([at_ns0], [ip link set dev p0 address e6:66:c1:11:11:11]) +NS_CHECK_EXEC([at_ns1], [ip link set dev p1 address e6:66:c1:22:22:22]) + +dnl Allow any traffic from ns0->ns1. Only allow return traffic from ns1->ns0. +AT_DATA([flows.txt], [dnl +table=0,priority=100,in_port=1,sctp,action=ct(commit,zone=1,nat(src=10.1.1.240:34567)),controller +table=0,priority=100,in_port=2,ct_state=-trk,sctp,tp_dst=34567,action=ct(table=1,zone=1,nat) +table=0,priority=0,action=drop +table=1,priority=100,in_port=2,ct_state=+trk+rpl,ct_zone=1,sctp,action=controller +table=1,priority=0,action=drop +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +AT_CAPTURE_FILE([ofctl_monitor.log]) +AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log]) + +dnl Simple SCTP association local and remote single homing +dnl Send INIT. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c111111108004502004400004000408424300a0101010a010102d6b9303900000000c5cc426b0100002470e18ccc0001a000000affff7ae1c142000c00060005000080000004c0000004 actions=resubmit(,0)"]) +dnl Reply INIT_ACK. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=e666c1111111e666c122222208004502012400004000408422610a0101020a0101f03039870770e18ccc97abd49a0200010425bb9dfa0001a000000a000abb90fba5000700e827a048cd1474b111490710816ec95cfc501126b200000000000000000000000000000000fa9dbb25cc8ce17000000000000000002b953b0e1d346d160a000a00a5fb90bb020087070a0101f00000000000000000000000000000000000000000393001000000000080020024fbb82eae13af8d70329bc42bb7cd7e6458d60ff1a181e9b41167c2cab54471bf0000000000000000000000000000000000000000000000000000000000000000000000000100002470e18ccc0001a000000affff7ae1c142000c00060005000080000004c00000040000000000000000000000000000000080000004c0000004 actions=resubmit(,0)"]) +dnl Send COOKIE_ECHO. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c1111111080045020108000040004084236c0a0101010a010102d6b9303925bb9dfaf2c860300a0000e827a048cd1474b111490710816ec95cfc501126b200000000000000000000000000000000fa9dbb25cc8ce17000000000000000002b953b0e1d346d160a000a00a5fb90bb020087070a0101f00000000000000000000000000000000000000000393001000000000080020024fbb82eae13af8d70329bc42bb7cd7e6458d60ff1a181e9b41167c2cab54471bf0000000000000000000000000000000000000000000000000000000000000000000000000100002470e18ccc0001a000000affff7ae1c142000c00060005000080000004c000000400000000000000000000000000000000 actions=resubmit(,0)"]) +dnl Reply COOKIE_ACK. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=e666c1111111e666c122222208004502002400004000408423610a0101020a0101f03039870770e18ccc0391398b0b000004 actions=resubmit(,0)"]) +dnl Send DATA. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c1111111080045020034000140004084243f0a0101010a010102d6b9303925bb9dfabc366345000300147ae1c1420000000000000000666f6f0a actions=resubmit(,0)"]) +dnl Reply SACK. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=e666c1111111e666c122222208004502003042c840004084e08c0a0101020a0101f03039870770e18ccc6a990714030000107ae1c14200019ffc00000000 actions=resubmit(,0)"]) +dnl ABORT the association. The association cannot be gracefully terminated because of +dnl a small timeouts in SHUTDOWN_SENT in the kernel datapath that would make the test unreliable +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c111111108004500002400010000408464510a0101010a010102d6b9303925bb9dfae3b82c3806000004 actions=resubmit(,0)"]) + +AT_CHECK([ovs-appctl revalidator/purge], [0]) + +OVS_APP_EXIT_AND_WAIT([ovs-ofctl]) + +AT_CHECK([cat ofctl_monitor.log], [0], [dnl +NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=82 in_port=1 (via action) data_len=82 (unbuffered) +sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:9670267b +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=306 ct_state=est|rpl|trk|dnat,ct_zone=1,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=132,ct_tp_src=54969,ct_tp_dst=12345,ip,in_port=2 (via action) data_len=306 (unbuffered) +sctp,vlan_tci=0x0000,dl_src=e6:66:c1:22:22:22,dl_dst=e6:66:c1:11:11:11,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=12345,tp_dst=54969 sctp_csum:49864886 +NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=278 in_port=1 (via action) data_len=278 (unbuffered) +sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:8c816918 +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=50 ct_state=est|rpl|trk|dnat,ct_zone=1,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=132,ct_tp_src=54969,ct_tp_dst=12345,ip,in_port=2 (via action) data_len=50 (unbuffered) +sctp,vlan_tci=0x0000,dl_src=e6:66:c1:22:22:22,dl_dst=e6:66:c1:11:11:11,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=12345,tp_dst=54969 sctp_csum:ef4749fc +NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=66 in_port=1 (via action) data_len=66 (unbuffered) +sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:eb2b2c17 +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=62 ct_state=est|rpl|trk|dnat,ct_zone=1,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=132,ct_tp_src=54969,ct_tp_dst=12345,ip,in_port=2 (via action) data_len=62 (unbuffered) +sctp,vlan_tci=0x0000,dl_src=e6:66:c1:22:22:22,dl_dst=e6:66:c1:11:11:11,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=12345,tp_dst=54969 sctp_csum:9b67e853 +NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=50 in_port=1 (via action) data_len=50 (unbuffered) +sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:4bb49f65 +]) + +dnl Check the ct entry +dnl protoinfo has to be removed in order to normalize the current difference between user and kernel output +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sed 's/,protoinfo=.*$//' ], [], [dnl +sctp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src=10.1.1.2,dst=10.1.1.240,sport=,dport=),zone=1 +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + dnl Check kernel datapath to make sure conntrack fills in L3 and L4 dnl protocol information AT_SETUP([conntrack - fragment reassembly with L3 L4 protocol information]) diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index 73e0e843b9f..d9b5b7e4c4d 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -106,6 +106,13 @@ m4_define([CHECK_CONNTRACK_NAT]) # m4_define([CHECK_CONNTRACK_ZEROIP_SNAT]) +# CHECK_CONNTRACK_SCTP() +# +# Perform requirements checks for running conntrack SCTP. The userspace +# datapath has no dependency, so no check is required. +# +m4_define([CHECK_CONNTRACK_SCTP]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests.