Skip to content

Commit

Permalink
conntrack: Extract l4 information for SCTP.
Browse files Browse the repository at this point in the history
Since a27d70a ("conntrack: add generic IP protocol support") all
the unrecognized IP protocols get handled using ct_proto_other ops
and are managed as L3 using 3 tuples.

This patch stores L4 information for SCTP in the conn_key so that
multiple conn instances, instead of one with ports zeroed, will be
created when there are multiple SCTP connections between two hosts.
It also performs crc32c check when not offloaded, and adds SCTP to
pat_enabled.

With this patch, given two SCTP association between two hosts,
tracking the connection will result in:

sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=55884,dport=5201),
    reply=(src=10.1.1.1,dst=10.1.1.2,sport=5201,dport=12345),zone=1
sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=59874,dport=5202),
    reply=(src=10.1.1.1,dst=10.1.1.2,sport=5202,dport=12346),zone=1

instead of:

sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=0,dport=0),
    reply=(src=10.1.1.1,dst=10.1.1.2,sport=0,dport=0),zone=1

Signed-off-by: Paolo Valerio <[email protected]>
Signed-off-by: Ilya Maximets <[email protected]>
  • Loading branch information
vlrpl authored and igsilya committed Jul 13, 2023
1 parent 62f5aa4 commit 501f665
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 1 deletion.
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Post-v3.1.0
- SRv6 Tunnel Protocol
* Added support for userspace datapath (only).
- Userspace datapath:
* Connection tracking now supports extraction of SCTP L4 information.
* Implementation of OpenFlow meters is now lockless allowing for better
multi-thread scalability.
* IP and L4 checksum offload support is now enabled by default for
Expand Down
86 changes: 85 additions & 1 deletion lib/conntrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "conntrack-private.h"
#include "conntrack-tp.h"
#include "coverage.h"
#include "crc32c.h"
#include "csum.h"
#include "ct-dpif.h"
#include "dp-packet.h"
Expand All @@ -41,6 +42,7 @@
#include "random.h"
#include "rculist.h"
#include "timeval.h"
#include "unaligned.h"

VLOG_DEFINE_THIS_MODULE(conntrack);

Expand Down Expand Up @@ -771,6 +773,8 @@ pat_packet(struct dp_packet *pkt, const struct conn_key *key)
packet_set_tcp_port(pkt, key->dst.port, key->src.port);
} else if (key->nw_proto == IPPROTO_UDP) {
packet_set_udp_port(pkt, key->dst.port, key->src.port);
} else if (key->nw_proto == IPPROTO_SCTP) {
packet_set_sctp_port(pkt, key->dst.port, key->src.port);
}
}

Expand Down Expand Up @@ -1675,6 +1679,26 @@ checksum_valid(const struct conn_key *key, const void *data, size_t size,
return valid;
}

static inline bool
sctp_checksum_valid(const void *data, size_t size)
{
struct sctp_header *sctp = (struct sctp_header *) data;
ovs_be32 rcvd_csum, csum;
bool ret;

rcvd_csum = get_16aligned_be32(&sctp->sctp_csum);
put_16aligned_be32(&sctp->sctp_csum, 0);
csum = crc32c(data, size);
put_16aligned_be32(&sctp->sctp_csum, rcvd_csum);

ret = (rcvd_csum == csum);
if (!ret) {
COVERAGE_INC(conntrack_l4csum_err);
}

return ret;
}

static inline bool
check_l4_tcp(const struct conn_key *key, const void *data, size_t size,
const void *l3, bool validate_checksum)
Expand Down Expand Up @@ -1711,6 +1735,47 @@ check_l4_udp(const struct conn_key *key, const void *data, size_t size,
|| (validate_checksum ? checksum_valid(key, data, size, l3) : true);
}

static inline bool
sctp_check_len(const struct sctp_header *sh, size_t size)
{
const struct sctp_chunk_header *sch;
size_t next;

if (size < SCTP_HEADER_LEN) {
return false;
}

/* rfc4960: Chunks (including Type, Length, and Value fields) are padded
* out by the sender with all zero bytes to be a multiple of 4 bytes long.
*/
for (next = sizeof(struct sctp_header),
sch = SCTP_NEXT_CHUNK(sh, next);
next < size;
next += ROUND_UP(ntohs(sch->length), 4),
sch = SCTP_NEXT_CHUNK(sh, next)) {
/* rfc4960: This value represents the size of the chunk in bytes,
* including the Chunk Type, Chunk Flags, Chunk Length, and Chunk Value
* fields.
* Therefore, if the Chunk Value field is zero-length, the Length
* field will be set to 4. */
if (ntohs(sch->length) < sizeof *sch) {
return false;
}
}

return (next == size);
}

static inline bool
check_l4_sctp(const void *data, size_t size, bool validate_checksum)
{
if (OVS_UNLIKELY(!sctp_check_len(data, size))) {
return false;
}

return validate_checksum ? sctp_checksum_valid(data, size) : true;
}

static inline bool
check_l4_icmp(const void *data, size_t size, bool validate_checksum)
{
Expand Down Expand Up @@ -1761,6 +1826,21 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size,
return key->src.port && key->dst.port;
}

static inline bool
extract_l4_sctp(struct conn_key *key, const void *data, size_t size,
size_t *chk_len)
{
if (OVS_UNLIKELY(size < (chk_len ? *chk_len : SCTP_HEADER_LEN))) {
return false;
}

const struct sctp_header *sctp = data;
key->src.port = sctp->sctp_src;
key->dst.port = sctp->sctp_dst;

return key->src.port && key->dst.port;
}

static inline bool extract_l4(struct conn_key *key, const void *data,
size_t size, bool *related, const void *l3,
bool validate_checksum, size_t *chk_len);
Expand Down Expand Up @@ -1976,6 +2056,9 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
return (!related || check_l4_udp(key, data, size, l3,
validate_checksum))
&& extract_l4_udp(key, data, size, chk_len);
} else if (key->nw_proto == IPPROTO_SCTP) {
return (!related || check_l4_sctp(data, size, validate_checksum))
&& extract_l4_sctp(key, data, size, chk_len);
} else if (key->dl_type == htons(ETH_TYPE_IP)
&& key->nw_proto == IPPROTO_ICMP) {
return (!related || check_l4_icmp(data, size, validate_checksum))
Expand Down Expand Up @@ -2374,7 +2457,8 @@ nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn,
uint32_t hash = nat_range_hash(conn, ct->hash_basis, nat_info);
union ct_addr min_addr = {0}, max_addr = {0}, addr = {0};
bool pat_proto = conn->key.nw_proto == IPPROTO_TCP ||
conn->key.nw_proto == IPPROTO_UDP;
conn->key.nw_proto == IPPROTO_UDP ||
conn->key.nw_proto == IPPROTO_SCTP;
uint16_t min_dport, max_dport, curr_dport;
uint16_t min_sport, max_sport, curr_sport;

Expand Down
11 changes: 11 additions & 0 deletions lib/packets.h
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,17 @@ struct sctp_header {
};
BUILD_ASSERT_DECL(SCTP_HEADER_LEN == sizeof(struct sctp_header));

#define SCTP_CHUNK_HEADER_LEN 4
struct sctp_chunk_header {
uint8_t type;
uint8_t flags;
ovs_be16 length;
};
BUILD_ASSERT_DECL(SCTP_CHUNK_HEADER_LEN == sizeof(struct sctp_chunk_header));

#define SCTP_NEXT_CHUNK(sh, off) \
ALIGNED_CAST(struct sctp_chunk_header *, (uint8_t *) sh + off)

#define UDP_HEADER_LEN 8
struct udp_header {
ovs_be16 udp_src;
Expand Down
11 changes: 11 additions & 0 deletions tests/system-kmod-macros.at
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,17 @@ m4_define([CHECK_CONNTRACK_ZEROIP_SNAT],
AT_SKIP_IF([test "$IS_WIN32" = "yes"])
])

# CHECK_CONNTRACK_SCTP()
#
# Perform requirements checks for running conntrack SCTP. The kernel
# optionally support nf proto sctp.
#
m4_define([CHECK_CONNTRACK_SCTP],
[
AT_SKIP_IF([test "$IS_WIN32" = "yes"])
AT_SKIP_IF([! test -e /proc/sys/net/netfilter/nf_conntrack_sctp_timeout_closed])
])

# CHECK_CONNTRACK_TIMEOUT()
#
# Perform requirements checks for running conntrack customized timeout tests.
Expand Down
73 changes: 73 additions & 0 deletions tests/system-traffic.at
Original file line number Diff line number Diff line change
Expand Up @@ -4701,6 +4701,79 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP

AT_SETUP([conntrack - SCTP SNAT with port range])
CHECK_CONNTRACK()
CHECK_CONNTRACK_SCTP()
OVS_TRAFFIC_VSWITCHD_START()

ADD_NAMESPACES(at_ns0, at_ns1)

ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
NS_CHECK_EXEC([at_ns0], [ip link set dev p0 address e6:66:c1:11:11:11])
NS_CHECK_EXEC([at_ns1], [ip link set dev p1 address e6:66:c1:22:22:22])

dnl Allow any traffic from ns0->ns1. Only allow return traffic from ns1->ns0.
AT_DATA([flows.txt], [dnl
table=0,priority=100,in_port=1,sctp,action=ct(commit,zone=1,nat(src=10.1.1.240:34567)),controller
table=0,priority=100,in_port=2,ct_state=-trk,sctp,tp_dst=34567,action=ct(table=1,zone=1,nat)
table=0,priority=0,action=drop
table=1,priority=100,in_port=2,ct_state=+trk+rpl,ct_zone=1,sctp,action=controller
table=1,priority=0,action=drop
])

AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])

AT_CAPTURE_FILE([ofctl_monitor.log])
AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log])

dnl Simple SCTP association local and remote single homing
dnl Send INIT.
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c111111108004502004400004000408424300a0101010a010102d6b9303900000000c5cc426b0100002470e18ccc0001a000000affff7ae1c142000c00060005000080000004c0000004 actions=resubmit(,0)"])
dnl Reply INIT_ACK.
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=e666c1111111e666c122222208004502012400004000408422610a0101020a0101f03039870770e18ccc97abd49a0200010425bb9dfa0001a000000a000abb90fba5000700e827a048cd1474b111490710816ec95cfc501126b200000000000000000000000000000000fa9dbb25cc8ce17000000000000000002b953b0e1d346d160a000a00a5fb90bb020087070a0101f00000000000000000000000000000000000000000393001000000000080020024fbb82eae13af8d70329bc42bb7cd7e6458d60ff1a181e9b41167c2cab54471bf0000000000000000000000000000000000000000000000000000000000000000000000000100002470e18ccc0001a000000affff7ae1c142000c00060005000080000004c00000040000000000000000000000000000000080000004c0000004 actions=resubmit(,0)"])
dnl Send COOKIE_ECHO.
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c1111111080045020108000040004084236c0a0101010a010102d6b9303925bb9dfaf2c860300a0000e827a048cd1474b111490710816ec95cfc501126b200000000000000000000000000000000fa9dbb25cc8ce17000000000000000002b953b0e1d346d160a000a00a5fb90bb020087070a0101f00000000000000000000000000000000000000000393001000000000080020024fbb82eae13af8d70329bc42bb7cd7e6458d60ff1a181e9b41167c2cab54471bf0000000000000000000000000000000000000000000000000000000000000000000000000100002470e18ccc0001a000000affff7ae1c142000c00060005000080000004c000000400000000000000000000000000000000 actions=resubmit(,0)"])
dnl Reply COOKIE_ACK.
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=e666c1111111e666c122222208004502002400004000408423610a0101020a0101f03039870770e18ccc0391398b0b000004 actions=resubmit(,0)"])
dnl Send DATA.
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c1111111080045020034000140004084243f0a0101010a010102d6b9303925bb9dfabc366345000300147ae1c1420000000000000000666f6f0a actions=resubmit(,0)"])
dnl Reply SACK.
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=e666c1111111e666c122222208004502003042c840004084e08c0a0101020a0101f03039870770e18ccc6a990714030000107ae1c14200019ffc00000000 actions=resubmit(,0)"])
dnl ABORT the association. The association cannot be gracefully terminated because of
dnl a small timeouts in SHUTDOWN_SENT in the kernel datapath that would make the test unreliable
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=e666c1222222e666c111111108004500002400010000408464510a0101010a010102d6b9303925bb9dfae3b82c3806000004 actions=resubmit(,0)"])

AT_CHECK([ovs-appctl revalidator/purge], [0])

OVS_APP_EXIT_AND_WAIT([ovs-ofctl])

AT_CHECK([cat ofctl_monitor.log], [0], [dnl
NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=82 in_port=1 (via action) data_len=82 (unbuffered)
sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:9670267b
NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=306 ct_state=est|rpl|trk|dnat,ct_zone=1,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=132,ct_tp_src=54969,ct_tp_dst=12345,ip,in_port=2 (via action) data_len=306 (unbuffered)
sctp,vlan_tci=0x0000,dl_src=e6:66:c1:22:22:22,dl_dst=e6:66:c1:11:11:11,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=12345,tp_dst=54969 sctp_csum:49864886
NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=278 in_port=1 (via action) data_len=278 (unbuffered)
sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:8c816918
NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=50 ct_state=est|rpl|trk|dnat,ct_zone=1,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=132,ct_tp_src=54969,ct_tp_dst=12345,ip,in_port=2 (via action) data_len=50 (unbuffered)
sctp,vlan_tci=0x0000,dl_src=e6:66:c1:22:22:22,dl_dst=e6:66:c1:11:11:11,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=12345,tp_dst=54969 sctp_csum:ef4749fc
NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=66 in_port=1 (via action) data_len=66 (unbuffered)
sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:eb2b2c17
NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=62 ct_state=est|rpl|trk|dnat,ct_zone=1,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=132,ct_tp_src=54969,ct_tp_dst=12345,ip,in_port=2 (via action) data_len=62 (unbuffered)
sctp,vlan_tci=0x0000,dl_src=e6:66:c1:22:22:22,dl_dst=e6:66:c1:11:11:11,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=2,nw_ttl=64,nw_frag=no,tp_src=12345,tp_dst=54969 sctp_csum:9b67e853
NXT_PACKET_IN2 (xid=0x0): cookie=0x0 total_len=50 in_port=1 (via action) data_len=50 (unbuffered)
sctp,vlan_tci=0x0000,dl_src=e6:66:c1:11:11:11,dl_dst=e6:66:c1:22:22:22,nw_src=10.1.1.240,nw_dst=10.1.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,nw_frag=no,tp_src=34567,tp_dst=12345 sctp_csum:4bb49f65
])

dnl Check the ct entry
dnl protoinfo has to be removed in order to normalize the current difference between user and kernel output
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sed 's/,protoinfo=.*$//' ], [], [dnl
sctp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.240,sport=<cleared>,dport=<cleared>),zone=1
])

OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP

dnl Check kernel datapath to make sure conntrack fills in L3 and L4
dnl protocol information
AT_SETUP([conntrack - fragment reassembly with L3 L4 protocol information])
Expand Down
7 changes: 7 additions & 0 deletions tests/system-userspace-macros.at
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ m4_define([CHECK_CONNTRACK_NAT])
#
m4_define([CHECK_CONNTRACK_ZEROIP_SNAT])

# CHECK_CONNTRACK_SCTP()
#
# Perform requirements checks for running conntrack SCTP. The userspace
# datapath has no dependency, so no check is required.
#
m4_define([CHECK_CONNTRACK_SCTP])

# CHECK_CONNTRACK_TIMEOUT()
#
# Perform requirements checks for running conntrack customized timeout tests.
Expand Down

0 comments on commit 501f665

Please sign in to comment.