diff --git a/Cargo.toml b/Cargo.toml index 614a3f932..27b985033 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ once_cell = { version = "1.5.2", optional = true } # libc backend can be selected via adding `--cfg=rustix_use_libc` to # `RUSTFLAGS` or enabling the `use-libc` cargo feature. [target.'cfg(all(not(rustix_use_libc), not(miri), target_os = "linux", target_endian = "little", any(target_arch = "arm", all(target_arch = "aarch64", target_pointer_width = "64"), target_arch = "riscv64", all(rustix_use_experimental_asm, target_arch = "powerpc64"), all(rustix_use_experimental_asm, target_arch = "mips"), all(rustix_use_experimental_asm, target_arch = "mips32r6"), all(rustix_use_experimental_asm, target_arch = "mips64"), all(rustix_use_experimental_asm, target_arch = "mips64r6"), target_arch = "x86", all(target_arch = "x86_64", target_pointer_width = "64"))))'.dependencies] -linux-raw-sys = { version = "0.4.11", default-features = false, features = ["general", "errno", "ioctl", "no_std", "elf"] } +linux-raw-sys = { version = "0.4.12", default-features = false, features = ["general", "errno", "ioctl", "no_std", "elf"] } libc_errno = { package = "errno", version = "0.3.8", default-features = false, optional = true } libc = { version = "0.2.152", default-features = false, features = ["extra_traits"], optional = true } @@ -53,7 +53,7 @@ libc = { version = "0.2.152", default-features = false, features = ["extra_trait # Some syscalls do not have libc wrappers, such as in `io_uring`. For these, # the libc backend uses the linux-raw-sys ABI and `libc::syscall`. [target.'cfg(all(any(target_os = "android", target_os = "linux"), any(rustix_use_libc, miri, not(all(target_os = "linux", target_endian = "little", any(target_arch = "arm", all(target_arch = "aarch64", target_pointer_width = "64"), target_arch = "riscv64", all(rustix_use_experimental_asm, target_arch = "powerpc64"), all(rustix_use_experimental_asm, target_arch = "mips"), all(rustix_use_experimental_asm, target_arch = "mips32r6"), all(rustix_use_experimental_asm, target_arch = "mips64"), all(rustix_use_experimental_asm, target_arch = "mips64r6"), target_arch = "x86", all(target_arch = "x86_64", target_pointer_width = "64")))))))'.dependencies] -linux-raw-sys = { version = "0.4.11", default-features = false, features = ["general", "ioctl", "no_std"] } +linux-raw-sys = { version = "0.4.12", default-features = false, features = ["general", "ioctl", "no_std"] } # For the libc backend on Windows, use the Winsock API in windows-sys. [target.'cfg(windows)'.dependencies.windows-sys] @@ -141,7 +141,7 @@ io_uring = ["event", "fs", "net", "linux-raw-sys/io_uring"] mount = [] # Enable `rustix::net::*`. -net = ["linux-raw-sys/net", "linux-raw-sys/netlink", "linux-raw-sys/if_ether"] +net = ["linux-raw-sys/net", "linux-raw-sys/netlink", "linux-raw-sys/if_ether", "linux-raw-sys/xdp"] # Enable `rustix::thread::*`. thread = ["linux-raw-sys/prctl"] diff --git a/src/backend/libc/net/msghdr.rs b/src/backend/libc/net/msghdr.rs index dd9b156a5..d212c65a6 100644 --- a/src/backend/libc/net/msghdr.rs +++ b/src/backend/libc/net/msghdr.rs @@ -5,9 +5,13 @@ use crate::backend::c; use crate::backend::conv::{msg_control_len, msg_iov_len}; +#[cfg(target_os = "linux")] +use crate::backend::net::write_sockaddr::encode_sockaddr_xdp; use crate::backend::net::write_sockaddr::{encode_sockaddr_v4, encode_sockaddr_v6}; use crate::io::{self, IoSlice, IoSliceMut}; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; use crate::net::{RecvAncillaryBuffer, SendAncillaryBuffer, SocketAddrV4, SocketAddrV6}; use crate::utils::as_ptr; @@ -124,6 +128,28 @@ pub(crate) fn with_unix_msghdr( }) } +/// Create a message header intended to send with an IPv6 address. +#[cfg(target_os = "linux")] +pub(crate) fn with_xdp_msghdr( + addr: &SocketAddrXdp, + iov: &[IoSlice<'_>], + control: &mut SendAncillaryBuffer<'_, '_, '_>, + f: impl FnOnce(c::msghdr) -> R, +) -> R { + let encoded = encode_sockaddr_xdp(addr); + + f({ + let mut h = zero_msghdr(); + h.msg_name = as_ptr(&encoded) as _; + h.msg_namelen = size_of::() as _; + h.msg_iov = iov.as_ptr() as _; + h.msg_iovlen = msg_iov_len(iov.len()); + h.msg_control = control.as_control_ptr().cast(); + h.msg_controllen = msg_control_len(control.control_len()); + h + }) +} + /// Create a zero-initialized message header struct value. #[cfg(all(unix, not(target_os = "redox")))] pub(crate) fn zero_msghdr() -> c::msghdr { diff --git a/src/backend/libc/net/read_sockaddr.rs b/src/backend/libc/net/read_sockaddr.rs index 6da7a50dd..08939d4f8 100644 --- a/src/backend/libc/net/read_sockaddr.rs +++ b/src/backend/libc/net/read_sockaddr.rs @@ -8,6 +8,8 @@ use crate::backend::c; #[cfg(not(windows))] use crate::ffi::CStr; use crate::io; +#[cfg(target_os = "linux")] +use crate::net::xdp::{SockaddrXdpFlags, SocketAddrXdp}; use crate::net::{Ipv4Addr, Ipv6Addr, SocketAddrAny, SocketAddrV4, SocketAddrV6}; use core::mem::size_of; @@ -193,6 +195,19 @@ pub(crate) unsafe fn read_sockaddr( .map(SocketAddrAny::Unix) } } + #[cfg(target_os = "linux")] + c::AF_XDP => { + if len < size_of::() { + return Err(io::Errno::INVAL); + } + let decode = &*storage.cast::(); + Ok(SocketAddrAny::Xdp(SocketAddrXdp::new( + SockaddrXdpFlags::from_bits_retain(decode.sxdp_flags), + u32::from_be(decode.sxdp_ifindex), + u32::from_be(decode.sxdp_queue_id), + u32::from_be(decode.sxdp_shared_umem_fd), + ))) + } _ => Err(io::Errno::INVAL), } } @@ -301,6 +316,17 @@ unsafe fn inner_read_sockaddr_os( ) } } + #[cfg(target_os = "linux")] + c::AF_XDP => { + assert!(len >= size_of::()); + let decode = &*storage.cast::(); + SocketAddrAny::Xdp(SocketAddrXdp::new( + SockaddrXdpFlags::from_bits_retain(decode.sxdp_flags), + u32::from_be(decode.sxdp_ifindex), + u32::from_be(decode.sxdp_queue_id), + u32::from_be(decode.sxdp_shared_umem_fd), + )) + } other => unimplemented!("{:?}", other), } } diff --git a/src/backend/libc/net/sockopt.rs b/src/backend/libc/net/sockopt.rs index cff2ca288..42bbdada4 100644 --- a/src/backend/libc/net/sockopt.rs +++ b/src/backend/libc/net/sockopt.rs @@ -14,6 +14,8 @@ use crate::fd::BorrowedFd; use crate::ffi::CStr; use crate::io; use crate::net::sockopt::Timeout; +#[cfg(target_os = "linux")] +use crate::net::xdp::{XdpMmapOffsets, XdpOptionsFlags, XdpRingOffset, XdpStatistics, XdpUmemReg}; #[cfg(not(any( apple, windows, @@ -73,6 +75,8 @@ use c::TCP_KEEPALIVE as TCP_KEEPIDLE; use c::TCP_KEEPIDLE; use core::mem::{size_of, MaybeUninit}; use core::time::Duration; +#[cfg(target_os = "linux")] +use linux_raw_sys::xdp::{xdp_mmap_offsets, xdp_statistics, xdp_statistics_v1}; #[cfg(windows)] use windows_sys::Win32::Foundation::BOOL; @@ -963,6 +967,170 @@ pub(crate) fn get_socket_peercred(fd: BorrowedFd<'_>) -> io::Result { getsockopt(fd, c::SOL_SOCKET, c::SO_PEERCRED) } +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_umem_reg(fd: BorrowedFd<'_>, value: XdpUmemReg) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_UMEM_REG, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_umem_fill_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_UMEM_FILL_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_umem_completion_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_UMEM_COMPLETION_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_tx_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_TX_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_rx_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_RX_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn get_xdp_mmap_offsets(fd: BorrowedFd<'_>) -> io::Result { + // The kernel will write `xdp_mmap_offsets` or `xdp_mmap_offsets_v1` to the supplied pointer, + // depending on the kernel version. Both structs only contain u64 values. + // By using the larger of both as the parameter, we can shuffle the values to the non-v1 version + // returned by `get_xdp_mmap_offsets` while keeping the return type unaffected by the kernel + // version. This works because C will layout all struct members one after the other. + + let mut optlen = core::mem::size_of::().try_into().unwrap(); + debug_assert!( + optlen as usize >= core::mem::size_of::(), + "Socket APIs don't ever use `bool` directly" + ); + let mut value = MaybeUninit::::zeroed(); + getsockopt_raw(fd, c::SOL_XDP, c::XDP_MMAP_OFFSETS, &mut value, &mut optlen)?; + + if optlen as usize == core::mem::size_of::() { + // Safety: All members of xdp_mmap_offsets are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xpd_mmap_offsets = unsafe { value.assume_init() }; + Ok(XdpMmapOffsets { + rx: XdpRingOffset { + producer: xpd_mmap_offsets.rx.producer, + consumer: xpd_mmap_offsets.rx.consumer, + desc: xpd_mmap_offsets.rx.desc, + flags: None, + }, + tx: XdpRingOffset { + producer: xpd_mmap_offsets.rx.flags, + consumer: xpd_mmap_offsets.tx.producer, + desc: xpd_mmap_offsets.tx.consumer, + flags: None, + }, + fr: XdpRingOffset { + producer: xpd_mmap_offsets.tx.desc, + consumer: xpd_mmap_offsets.tx.flags, + desc: xpd_mmap_offsets.fr.producer, + flags: None, + }, + cr: XdpRingOffset { + producer: xpd_mmap_offsets.fr.consumer, + consumer: xpd_mmap_offsets.fr.desc, + desc: xpd_mmap_offsets.fr.flags, + flags: None, + }, + }) + } else { + assert_eq!( + optlen as usize, + core::mem::size_of::(), + "unexpected getsockopt size" + ); + // Safety: All members of xdp_mmap_offsets are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xpd_mmap_offsets = unsafe { value.assume_init() }; + Ok(XdpMmapOffsets { + rx: XdpRingOffset { + producer: xpd_mmap_offsets.rx.producer, + consumer: xpd_mmap_offsets.rx.consumer, + desc: xpd_mmap_offsets.rx.desc, + flags: Some(xpd_mmap_offsets.rx.flags), + }, + tx: XdpRingOffset { + producer: xpd_mmap_offsets.tx.producer, + consumer: xpd_mmap_offsets.tx.consumer, + desc: xpd_mmap_offsets.tx.desc, + flags: Some(xpd_mmap_offsets.tx.flags), + }, + fr: XdpRingOffset { + producer: xpd_mmap_offsets.fr.producer, + consumer: xpd_mmap_offsets.fr.consumer, + desc: xpd_mmap_offsets.fr.desc, + flags: Some(xpd_mmap_offsets.fr.flags), + }, + cr: XdpRingOffset { + producer: xpd_mmap_offsets.cr.producer, + consumer: xpd_mmap_offsets.cr.consumer, + desc: xpd_mmap_offsets.cr.desc, + flags: Some(xpd_mmap_offsets.cr.flags), + }, + }) + } +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn get_xdp_statistics(fd: BorrowedFd<'_>) -> io::Result { + let mut optlen = core::mem::size_of::().try_into().unwrap(); + debug_assert!( + optlen as usize >= core::mem::size_of::(), + "Socket APIs don't ever use `bool` directly" + ); + let mut value = MaybeUninit::::zeroed(); + getsockopt_raw(fd, c::SOL_XDP, c::XDP_STATISTICS, &mut value, &mut optlen)?; + + if optlen as usize == core::mem::size_of::() { + // Safety: All members of xdp_statistics are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xdp_statistics = unsafe { value.assume_init() }; + Ok(XdpStatistics { + rx_dropped: xdp_statistics.rx_dropped, + rx_invalid_descs: xdp_statistics.rx_dropped, + tx_invalid_descs: xdp_statistics.rx_dropped, + rx_ring_full: None, + rx_fill_ring_empty_descs: None, + tx_ring_empty_descs: None, + }) + } else { + assert_eq!( + optlen as usize, + core::mem::size_of::(), + "unexpected getsockopt size" + ); + // Safety: All members of xdp_statistics are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xdp_statistics = unsafe { value.assume_init() }; + Ok(XdpStatistics { + rx_dropped: xdp_statistics.rx_dropped, + rx_invalid_descs: xdp_statistics.rx_invalid_descs, + tx_invalid_descs: xdp_statistics.tx_invalid_descs, + rx_ring_full: Some(xdp_statistics.rx_ring_full), + rx_fill_ring_empty_descs: Some(xdp_statistics.rx_fill_ring_empty_descs), + tx_ring_empty_descs: Some(xdp_statistics.tx_ring_empty_descs), + }) + } +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn get_xdp_options(fd: BorrowedFd<'_>) -> io::Result { + getsockopt(fd, c::SOL_XDP, c::XDP_OPTIONS) +} + #[inline] fn to_ip_mreq(multiaddr: &Ipv4Addr, interface: &Ipv4Addr) -> c::ip_mreq { c::ip_mreq { diff --git a/src/backend/libc/net/syscalls.rs b/src/backend/libc/net/syscalls.rs index 48dbf1f90..3fdb7766b 100644 --- a/src/backend/libc/net/syscalls.rs +++ b/src/backend/libc/net/syscalls.rs @@ -2,10 +2,16 @@ #[cfg(unix)] use super::addr::SocketAddrUnix; +#[cfg(target_os = "linux")] +use super::msghdr::with_xdp_msghdr; +#[cfg(target_os = "linux")] +use super::write_sockaddr::encode_sockaddr_xdp; use crate::backend::c; use crate::backend::conv::{borrowed_fd, ret, ret_owned_fd, ret_send_recv, send_recv_len}; use crate::fd::{BorrowedFd, OwnedFd}; use crate::io; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; use crate::net::{SocketAddrAny, SocketAddrV4, SocketAddrV6}; use crate::utils::as_ptr; use core::mem::{size_of, MaybeUninit}; @@ -145,6 +151,25 @@ pub(crate) fn sendto_unix( } } +#[cfg(target_os = "linux")] +pub(crate) fn sendto_xdp( + fd: BorrowedFd<'_>, + buf: &[u8], + flags: SendFlags, + addr: &SocketAddrXdp, +) -> io::Result { + unsafe { + ret_send_recv(c::sendto( + borrowed_fd(fd), + buf.as_ptr().cast(), + send_recv_len(buf.len()), + bitflags_bits!(flags), + as_ptr(&encode_sockaddr_xdp(addr)).cast::(), + size_of::() as _, + )) + } +} + #[cfg(not(any(target_os = "redox", target_os = "wasi")))] pub(crate) fn socket( domain: AddressFamily, @@ -217,6 +242,17 @@ pub(crate) fn bind_unix(sockfd: BorrowedFd<'_>, addr: &SocketAddrUnix) -> io::Re } } +#[cfg(target_os = "linux")] +pub(crate) fn bind_xdp(sockfd: BorrowedFd<'_>, addr: &SocketAddrXdp) -> io::Result<()> { + unsafe { + ret(c::bind( + borrowed_fd(sockfd), + as_ptr(&encode_sockaddr_xdp(addr)).cast(), + size_of::() as c::socklen_t, + )) + } +} + #[cfg(not(any(target_os = "redox", target_os = "wasi")))] pub(crate) fn connect_v4(sockfd: BorrowedFd<'_>, addr: &SocketAddrV4) -> io::Result<()> { unsafe { @@ -402,6 +438,23 @@ pub(crate) fn sendmsg_unix( }) } +#[cfg(target_os = "linux")] +pub(crate) fn sendmsg_xdp( + sockfd: BorrowedFd<'_>, + addr: &SocketAddrXdp, + iov: &[IoSlice<'_>], + control: &mut SendAncillaryBuffer<'_, '_, '_>, + msg_flags: SendFlags, +) -> io::Result { + with_xdp_msghdr(addr, iov, control, |msghdr| unsafe { + ret_send_recv(c::sendmsg( + borrowed_fd(sockfd), + &msghdr, + bitflags_bits!(msg_flags), + )) + }) +} + #[cfg(not(any( apple, windows, diff --git a/src/backend/libc/net/write_sockaddr.rs b/src/backend/libc/net/write_sockaddr.rs index 2eee98cb8..fdc5dbb13 100644 --- a/src/backend/libc/net/write_sockaddr.rs +++ b/src/backend/libc/net/write_sockaddr.rs @@ -6,6 +6,8 @@ use super::addr::SocketAddrStorage; use super::addr::SocketAddrUnix; use super::ext::{in6_addr_new, in_addr_new, sockaddr_in6_new}; use crate::backend::c; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; use crate::net::{SocketAddrAny, SocketAddrV4, SocketAddrV6}; use core::mem::size_of; @@ -18,6 +20,8 @@ pub(crate) unsafe fn write_sockaddr( SocketAddrAny::V6(v6) => write_sockaddr_v6(v6, storage), #[cfg(unix)] SocketAddrAny::Unix(unix) => write_sockaddr_unix(unix, storage), + #[cfg(target_os = "linux")] + SocketAddrAny::Xdp(xdp) => write_sockaddr_xdp(xdp, storage), } } @@ -101,3 +105,21 @@ unsafe fn write_sockaddr_unix(unix: &SocketAddrUnix, storage: *mut SocketAddrSto core::ptr::write(storage.cast(), unix.unix); unix.len() } + +#[cfg(target_os = "linux")] +pub(crate) fn encode_sockaddr_xdp(xdp: &SocketAddrXdp) -> c::sockaddr_xdp { + c::sockaddr_xdp { + sxdp_family: c::AF_XDP as _, + sxdp_flags: xdp.flags().bits(), + sxdp_ifindex: xdp.interface_index(), + sxdp_queue_id: xdp.queue_id(), + sxdp_shared_umem_fd: xdp.shared_umem_fd(), + } +} + +#[cfg(target_os = "linux")] +unsafe fn write_sockaddr_xdp(xdp: &SocketAddrXdp, storage: *mut SocketAddrStorage) -> usize { + let encoded = encode_sockaddr_xdp(xdp); + core::ptr::write(storage.cast(), encoded); + size_of::() +} diff --git a/src/backend/linux_raw/c.rs b/src/backend/linux_raw/c.rs index 54174bb0c..b2cd5bdcb 100644 --- a/src/backend/linux_raw/c.rs +++ b/src/backend/linux_raw/c.rs @@ -62,7 +62,7 @@ pub(crate) use linux_raw_sys::{ AF_ASH, AF_ATMPVC, AF_ATMSVC, AF_AX25, AF_BLUETOOTH, AF_BRIDGE, AF_CAN, AF_ECONET, AF_IEEE802154, AF_INET, AF_INET6, AF_IPX, AF_IRDA, AF_ISDN, AF_IUCV, AF_KEY, AF_LLC, AF_NETBEUI, AF_NETLINK, AF_NETROM, AF_PACKET, AF_PHONET, AF_PPPOX, AF_RDS, AF_ROSE, - AF_RXRPC, AF_SECURITY, AF_SNA, AF_TIPC, AF_UNIX, AF_UNSPEC, AF_WANPIPE, AF_X25, + AF_RXRPC, AF_SECURITY, AF_SNA, AF_TIPC, AF_UNIX, AF_UNSPEC, AF_WANPIPE, AF_X25, AF_XDP, IP6T_SO_ORIGINAL_DST, IPPROTO_FRAGMENT, IPPROTO_ICMPV6, IPPROTO_MH, IPPROTO_ROUTING, IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_FREEBIND, IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_RECVTCLASS, IPV6_TCLASS, IPV6_UNICAST_HOPS, IPV6_V6ONLY, @@ -71,14 +71,25 @@ pub(crate) use linux_raw_sys::{ MSG_CMSG_CLOEXEC, MSG_CONFIRM, MSG_DONTROUTE, MSG_DONTWAIT, MSG_EOR, MSG_ERRQUEUE, MSG_MORE, MSG_NOSIGNAL, MSG_OOB, MSG_PEEK, MSG_TRUNC, MSG_WAITALL, SCM_CREDENTIALS, SCM_RIGHTS, SHUT_RD, SHUT_RDWR, SHUT_WR, SOCK_DGRAM, SOCK_RAW, SOCK_RDM, SOCK_SEQPACKET, - SOCK_STREAM, SOL_SOCKET, SO_ACCEPTCONN, SO_BROADCAST, SO_COOKIE, SO_DOMAIN, SO_ERROR, - SO_INCOMING_CPU, SO_KEEPALIVE, SO_LINGER, SO_OOBINLINE, SO_ORIGINAL_DST, SO_PASSCRED, - SO_PROTOCOL, SO_RCVBUF, SO_RCVTIMEO_NEW, SO_RCVTIMEO_NEW as SO_RCVTIMEO, SO_RCVTIMEO_OLD, - SO_REUSEADDR, SO_REUSEPORT, SO_SNDBUF, SO_SNDTIMEO_NEW, SO_SNDTIMEO_NEW as SO_SNDTIMEO, - SO_SNDTIMEO_OLD, SO_TYPE, TCP_CONGESTION, TCP_CORK, TCP_KEEPCNT, TCP_KEEPIDLE, - TCP_KEEPINTVL, TCP_NODELAY, TCP_QUICKACK, TCP_THIN_LINEAR_TIMEOUTS, TCP_USER_TIMEOUT, + SOCK_STREAM, SOL_SOCKET, SOL_XDP, SO_ACCEPTCONN, SO_BROADCAST, SO_COOKIE, SO_DOMAIN, + SO_ERROR, SO_INCOMING_CPU, SO_KEEPALIVE, SO_LINGER, SO_OOBINLINE, SO_ORIGINAL_DST, + SO_PASSCRED, SO_PROTOCOL, SO_RCVBUF, SO_RCVTIMEO_NEW, SO_RCVTIMEO_NEW as SO_RCVTIMEO, + SO_RCVTIMEO_OLD, SO_REUSEADDR, SO_REUSEPORT, SO_SNDBUF, SO_SNDTIMEO_NEW, + SO_SNDTIMEO_NEW as SO_SNDTIMEO, SO_SNDTIMEO_OLD, SO_TYPE, TCP_CONGESTION, TCP_CORK, + TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL, TCP_NODELAY, TCP_QUICKACK, + TCP_THIN_LINEAR_TIMEOUTS, TCP_USER_TIMEOUT, }, netlink::*, + xdp::{ + sockaddr_xdp, xdp_desc, xdp_mmap_offsets, xdp_mmap_offsets_v1, xdp_options, + xdp_ring_offset, xdp_ring_offset_v1, xdp_statistics, xdp_statistics_v1, xdp_umem_reg, + xdp_umem_reg_v1, XDP_COPY, XDP_MMAP_OFFSETS, XDP_OPTIONS, XDP_OPTIONS_ZEROCOPY, + XDP_PGOFF_RX_RING, XDP_PGOFF_TX_RING, XDP_PKT_CONTD, XDP_RING_NEED_WAKEUP, XDP_RX_RING, + XDP_SHARED_UMEM, XDP_STATISTICS, XDP_TX_RING, XDP_UMEM_COMPLETION_RING, XDP_UMEM_FILL_RING, + XDP_UMEM_PGOFF_COMPLETION_RING, XDP_UMEM_PGOFF_FILL_RING, XDP_UMEM_REG, + XDP_UMEM_UNALIGNED_CHUNK_FLAG, XDP_USE_NEED_WAKEUP, XDP_USE_SG, XDP_ZEROCOPY, + XSK_UNALIGNED_BUF_ADDR_MASK, XSK_UNALIGNED_BUF_OFFSET_SHIFT, + }, }; // Cast away bindgen's `enum` type to make these consistent with the other diff --git a/src/backend/linux_raw/net/msghdr.rs b/src/backend/linux_raw/net/msghdr.rs index 2b88bfbbc..3ccce04c9 100644 --- a/src/backend/linux_raw/net/msghdr.rs +++ b/src/backend/linux_raw/net/msghdr.rs @@ -6,9 +6,13 @@ #![allow(unsafe_code)] use crate::backend::c; +#[cfg(target_os = "linux")] +use crate::backend::net::write_sockaddr::encode_sockaddr_xdp; use crate::backend::net::write_sockaddr::{encode_sockaddr_v4, encode_sockaddr_v6}; use crate::io::{self, IoSlice, IoSliceMut}; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; use crate::net::{RecvAncillaryBuffer, SendAncillaryBuffer, SocketAddrV4, SocketAddrV6}; use crate::utils::as_ptr; @@ -132,6 +136,27 @@ pub(crate) fn with_unix_msghdr( }) } +/// Create a message header intended to send with an XDP address. +#[cfg(target_os = "linux")] +pub(crate) fn with_xdp_msghdr( + addr: &SocketAddrXdp, + iov: &[IoSlice<'_>], + control: &mut SendAncillaryBuffer<'_, '_, '_>, + f: impl FnOnce(c::msghdr) -> R, +) -> R { + let encoded = encode_sockaddr_xdp(addr); + + f(c::msghdr { + msg_name: as_ptr(&encoded) as _, + msg_namelen: size_of::() as _, + msg_iov: iov.as_ptr() as _, + msg_iovlen: msg_iov_len(iov.len()), + msg_control: control.as_control_ptr().cast(), + msg_controllen: msg_control_len(control.control_len()), + msg_flags: 0, + }) +} + /// Create a zero-initialized message header struct value. pub(crate) fn zero_msghdr() -> c::msghdr { c::msghdr { diff --git a/src/backend/linux_raw/net/read_sockaddr.rs b/src/backend/linux_raw/net/read_sockaddr.rs index 5a91707ef..23e1d641d 100644 --- a/src/backend/linux_raw/net/read_sockaddr.rs +++ b/src/backend/linux_raw/net/read_sockaddr.rs @@ -4,6 +4,8 @@ use crate::backend::c; use crate::io; +#[cfg(target_os = "linux")] +use crate::net::xdp::{SockaddrXdpFlags, SocketAddrXdp}; use crate::net::{Ipv4Addr, Ipv6Addr, SocketAddrAny, SocketAddrUnix, SocketAddrV4, SocketAddrV6}; use core::mem::size_of; use core::slice; @@ -112,6 +114,19 @@ pub(crate) unsafe fn read_sockaddr( Ok(SocketAddrAny::Unix(SocketAddrUnix::new(bytes)?)) } } + #[cfg(target_os = "linux")] + c::AF_XDP => { + if len < size_of::() { + return Err(io::Errno::INVAL); + } + let decode = &*storage.cast::(); + Ok(SocketAddrAny::Xdp(SocketAddrXdp::new( + SockaddrXdpFlags::from_bits_retain(decode.sxdp_flags), + u32::from_be(decode.sxdp_ifindex), + u32::from_be(decode.sxdp_queue_id), + u32::from_be(decode.sxdp_shared_umem_fd), + ))) + } _ => Err(io::Errno::NOTSUP), } } @@ -190,6 +205,17 @@ pub(crate) unsafe fn read_sockaddr_os(storage: *const c::sockaddr, len: usize) - SocketAddrAny::Unix(SocketAddrUnix::new(bytes).unwrap()) } } + #[cfg(target_os = "linux")] + c::AF_XDP => { + assert!(len >= size_of::()); + let decode = &*storage.cast::(); + SocketAddrAny::Xdp(SocketAddrXdp::new( + SockaddrXdpFlags::from_bits_retain(decode.sxdp_flags), + u32::from_be(decode.sxdp_ifindex), + u32::from_be(decode.sxdp_queue_id), + u32::from_be(decode.sxdp_shared_umem_fd), + )) + } other => unimplemented!("{:?}", other), } } diff --git a/src/backend/linux_raw/net/sockopt.rs b/src/backend/linux_raw/net/sockopt.rs index 6a740bbf7..d8066032e 100644 --- a/src/backend/linux_raw/net/sockopt.rs +++ b/src/backend/linux_raw/net/sockopt.rs @@ -12,6 +12,8 @@ use crate::fd::BorrowedFd; use crate::ffi::CStr; use crate::io; use crate::net::sockopt::Timeout; +#[cfg(target_os = "linux")] +use crate::net::xdp::{XdpMmapOffsets, XdpOptionsFlags, XdpRingOffset, XdpStatistics, XdpUmemReg}; use crate::net::{ AddressFamily, Ipv4Addr, Ipv6Addr, Protocol, RawProtocol, SocketAddrAny, SocketAddrStorage, SocketAddrV4, SocketAddrV6, SocketType, UCred, @@ -23,6 +25,8 @@ use alloc::string::String; use core::mem::MaybeUninit; use core::time::Duration; use linux_raw_sys::general::{__kernel_old_timeval, __kernel_sock_timeval}; +#[cfg(target_os = "linux")] +use linux_raw_sys::xdp::{xdp_mmap_offsets, xdp_statistics, xdp_statistics_v1}; #[cfg(target_arch = "x86")] use { crate::backend::conv::{slice_just_addr, x86_sys}, @@ -799,6 +803,170 @@ pub(crate) fn get_socket_peercred(fd: BorrowedFd<'_>) -> io::Result { getsockopt(fd, c::SOL_SOCKET, linux_raw_sys::net::SO_PEERCRED) } +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_umem_reg(fd: BorrowedFd<'_>, value: XdpUmemReg) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_UMEM_REG, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_umem_fill_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_UMEM_FILL_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_umem_completion_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_UMEM_COMPLETION_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_tx_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_TX_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn set_xdp_rx_ring_size(fd: BorrowedFd<'_>, value: u32) -> io::Result<()> { + setsockopt(fd, c::SOL_XDP, c::XDP_RX_RING, value) +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn get_xdp_mmap_offsets(fd: BorrowedFd<'_>) -> io::Result { + // The kernel will write `xdp_mmap_offsets` or `xdp_mmap_offsets_v1` to the supplied pointer, + // depending on the kernel version. Both structs only contain u64 values. + // By using the larger of both as the parameter, we can shuffle the values to the non-v1 version + // returned by `get_xdp_mmap_offsets` while keeping the return type unaffected by the kernel + // version. This works because C will layout all struct members one after the other. + + let mut optlen = core::mem::size_of::().try_into().unwrap(); + debug_assert!( + optlen as usize >= core::mem::size_of::(), + "Socket APIs don't ever use `bool` directly" + ); + let mut value = MaybeUninit::::zeroed(); + getsockopt_raw(fd, c::SOL_XDP, c::XDP_MMAP_OFFSETS, &mut value, &mut optlen)?; + + if optlen as usize == core::mem::size_of::() { + // Safety: All members of xdp_mmap_offsets are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xpd_mmap_offsets = unsafe { value.assume_init() }; + Ok(XdpMmapOffsets { + rx: XdpRingOffset { + producer: xpd_mmap_offsets.rx.producer, + consumer: xpd_mmap_offsets.rx.consumer, + desc: xpd_mmap_offsets.rx.desc, + flags: None, + }, + tx: XdpRingOffset { + producer: xpd_mmap_offsets.rx.flags, + consumer: xpd_mmap_offsets.tx.producer, + desc: xpd_mmap_offsets.tx.consumer, + flags: None, + }, + fr: XdpRingOffset { + producer: xpd_mmap_offsets.tx.desc, + consumer: xpd_mmap_offsets.tx.flags, + desc: xpd_mmap_offsets.fr.producer, + flags: None, + }, + cr: XdpRingOffset { + producer: xpd_mmap_offsets.fr.consumer, + consumer: xpd_mmap_offsets.fr.desc, + desc: xpd_mmap_offsets.fr.flags, + flags: None, + }, + }) + } else { + assert_eq!( + optlen as usize, + core::mem::size_of::(), + "unexpected getsockopt size" + ); + // Safety: All members of xdp_mmap_offsets are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xpd_mmap_offsets = unsafe { value.assume_init() }; + Ok(XdpMmapOffsets { + rx: XdpRingOffset { + producer: xpd_mmap_offsets.rx.producer, + consumer: xpd_mmap_offsets.rx.consumer, + desc: xpd_mmap_offsets.rx.desc, + flags: Some(xpd_mmap_offsets.rx.flags), + }, + tx: XdpRingOffset { + producer: xpd_mmap_offsets.tx.producer, + consumer: xpd_mmap_offsets.tx.consumer, + desc: xpd_mmap_offsets.tx.desc, + flags: Some(xpd_mmap_offsets.tx.flags), + }, + fr: XdpRingOffset { + producer: xpd_mmap_offsets.fr.producer, + consumer: xpd_mmap_offsets.fr.consumer, + desc: xpd_mmap_offsets.fr.desc, + flags: Some(xpd_mmap_offsets.fr.flags), + }, + cr: XdpRingOffset { + producer: xpd_mmap_offsets.cr.producer, + consumer: xpd_mmap_offsets.cr.consumer, + desc: xpd_mmap_offsets.cr.desc, + flags: Some(xpd_mmap_offsets.cr.flags), + }, + }) + } +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn get_xdp_statistics(fd: BorrowedFd<'_>) -> io::Result { + let mut optlen = core::mem::size_of::().try_into().unwrap(); + debug_assert!( + optlen as usize >= core::mem::size_of::(), + "Socket APIs don't ever use `bool` directly" + ); + let mut value = MaybeUninit::::zeroed(); + getsockopt_raw(fd, c::SOL_XDP, c::XDP_STATISTICS, &mut value, &mut optlen)?; + + if optlen as usize == core::mem::size_of::() { + // Safety: All members of xdp_statistics are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xdp_statistics = unsafe { value.assume_init() }; + Ok(XdpStatistics { + rx_dropped: xdp_statistics.rx_dropped, + rx_invalid_descs: xdp_statistics.rx_dropped, + tx_invalid_descs: xdp_statistics.rx_dropped, + rx_ring_full: None, + rx_fill_ring_empty_descs: None, + tx_ring_empty_descs: None, + }) + } else { + assert_eq!( + optlen as usize, + core::mem::size_of::(), + "unexpected getsockopt size" + ); + // Safety: All members of xdp_statistics are u64 and thus are correctly initialized + // by `MaybeUninit::::zeroed()` + let xdp_statistics = unsafe { value.assume_init() }; + Ok(XdpStatistics { + rx_dropped: xdp_statistics.rx_dropped, + rx_invalid_descs: xdp_statistics.rx_invalid_descs, + tx_invalid_descs: xdp_statistics.tx_invalid_descs, + rx_ring_full: Some(xdp_statistics.rx_ring_full), + rx_fill_ring_empty_descs: Some(xdp_statistics.rx_fill_ring_empty_descs), + tx_ring_empty_descs: Some(xdp_statistics.tx_ring_empty_descs), + }) + } +} + +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn get_xdp_options(fd: BorrowedFd<'_>) -> io::Result { + getsockopt(fd, c::SOL_XDP, c::XDP_OPTIONS) +} + #[inline] fn to_ip_mreq(multiaddr: &Ipv4Addr, interface: &Ipv4Addr) -> c::ip_mreq { c::ip_mreq { diff --git a/src/backend/linux_raw/net/syscalls.rs b/src/backend/linux_raw/net/syscalls.rs index 726f022f8..4d4427a40 100644 --- a/src/backend/linux_raw/net/syscalls.rs +++ b/src/backend/linux_raw/net/syscalls.rs @@ -5,11 +5,15 @@ //! See the `rustix::backend` module documentation for details. #![allow(unsafe_code, clippy::undocumented_unsafe_blocks)] +#[cfg(target_os = "linux")] +use super::msghdr::with_xdp_msghdr; use super::msghdr::{ with_noaddr_msghdr, with_recv_msghdr, with_unix_msghdr, with_v4_msghdr, with_v6_msghdr, }; use super::read_sockaddr::{initialize_family_to_unspec, maybe_read_sockaddr_os, read_sockaddr_os}; use super::send_recv::{RecvFlags, SendFlags}; +#[cfg(target_os = "linux")] +use super::write_sockaddr::encode_sockaddr_xdp; use super::write_sockaddr::{encode_sockaddr_v4, encode_sockaddr_v6}; use crate::backend::c; use crate::backend::conv::{ @@ -18,6 +22,8 @@ use crate::backend::conv::{ }; use crate::fd::{BorrowedFd, OwnedFd}; use crate::io::{self, IoSlice, IoSliceMut}; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; use crate::net::{ AddressFamily, Protocol, RecvAncillaryBuffer, RecvMsgReturn, SendAncillaryBuffer, Shutdown, SocketAddrAny, SocketAddrUnix, SocketAddrV4, SocketAddrV6, SocketFlags, SocketType, @@ -402,6 +408,37 @@ pub(crate) fn sendmsg_unix( }) } +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn sendmsg_xdp( + sockfd: BorrowedFd<'_>, + addr: &SocketAddrXdp, + iov: &[IoSlice<'_>], + control: &mut SendAncillaryBuffer<'_, '_, '_>, + msg_flags: SendFlags, +) -> io::Result { + with_xdp_msghdr(addr, iov, control, |msghdr| { + #[cfg(not(target_arch = "x86"))] + let result = + unsafe { ret_usize(syscall!(__NR_sendmsg, sockfd, by_ref(&msghdr), msg_flags)) }; + + #[cfg(target_arch = "x86")] + let result = unsafe { + ret_usize(syscall!( + __NR_socketcall, + x86_sys(SYS_SENDMSG), + slice_just_addr::, _>(&[ + sockfd.into(), + by_ref(&msghdr), + msg_flags.into() + ]) + )) + }; + + result + }) +} + #[inline] pub(crate) fn shutdown(fd: BorrowedFd<'_>, how: Shutdown) -> io::Result<()> { #[cfg(not(target_arch = "x86"))] @@ -584,6 +621,45 @@ pub(crate) fn sendto_unix( } } +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn sendto_xdp( + fd: BorrowedFd<'_>, + buf: &[u8], + flags: SendFlags, + addr: &SocketAddrXdp, +) -> io::Result { + let (buf_addr, buf_len) = slice(buf); + + #[cfg(not(target_arch = "x86"))] + unsafe { + ret_usize(syscall_readonly!( + __NR_sendto, + fd, + buf_addr, + buf_len, + flags, + by_ref(&encode_sockaddr_xdp(addr)), + size_of::() + )) + } + #[cfg(target_arch = "x86")] + unsafe { + ret_usize(syscall_readonly!( + __NR_socketcall, + x86_sys(SYS_SENDTO), + slice_just_addr::, _>(&[ + fd.into(), + buf_addr, + buf_len, + flags.into(), + by_ref(&encode_sockaddr_xdp(addr)), + size_of::(), + ]) + )) + } +} + #[inline] pub(crate) unsafe fn recv( fd: BorrowedFd<'_>, @@ -829,6 +905,32 @@ pub(crate) fn bind_unix(fd: BorrowedFd<'_>, addr: &SocketAddrUnix) -> io::Result } } +#[cfg(target_os = "linux")] +#[inline] +pub(crate) fn bind_xdp(fd: BorrowedFd<'_>, addr: &SocketAddrXdp) -> io::Result<()> { + #[cfg(not(target_arch = "x86"))] + unsafe { + ret(syscall_readonly!( + __NR_bind, + fd, + by_ref(&encode_sockaddr_xdp(addr)), + size_of::() + )) + } + #[cfg(target_arch = "x86")] + unsafe { + ret(syscall_readonly!( + __NR_socketcall, + x86_sys(SYS_BIND), + slice_just_addr::, _>(&[ + fd.into(), + by_ref(&encode_sockaddr_xdp(addr)), + size_of::(), + ]) + )) + } +} + #[inline] pub(crate) fn connect_v4(fd: BorrowedFd<'_>, addr: &SocketAddrV4) -> io::Result<()> { #[cfg(not(target_arch = "x86"))] diff --git a/src/backend/linux_raw/net/write_sockaddr.rs b/src/backend/linux_raw/net/write_sockaddr.rs index 24edd4948..fb6e51edb 100644 --- a/src/backend/linux_raw/net/write_sockaddr.rs +++ b/src/backend/linux_raw/net/write_sockaddr.rs @@ -3,6 +3,8 @@ #![allow(unsafe_code)] use crate::backend::c; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; use crate::net::{SocketAddrAny, SocketAddrStorage, SocketAddrUnix, SocketAddrV4, SocketAddrV6}; use core::mem::size_of; @@ -14,6 +16,8 @@ pub(crate) unsafe fn write_sockaddr( SocketAddrAny::V4(v4) => write_sockaddr_v4(v4, storage), SocketAddrAny::V6(v6) => write_sockaddr_v6(v6, storage), SocketAddrAny::Unix(unix) => write_sockaddr_unix(unix, storage), + #[cfg(target_os = "linux")] + SocketAddrAny::Xdp(xdp) => write_sockaddr_xdp(xdp, storage), } } @@ -58,3 +62,21 @@ unsafe fn write_sockaddr_unix(unix: &SocketAddrUnix, storage: *mut SocketAddrSto core::ptr::write(storage.cast(), unix.unix); unix.len() } + +#[cfg(target_os = "linux")] +pub(crate) fn encode_sockaddr_xdp(xdp: &SocketAddrXdp) -> c::sockaddr_xdp { + c::sockaddr_xdp { + sxdp_family: c::AF_XDP as _, + sxdp_flags: xdp.flags().bits(), + sxdp_ifindex: xdp.interface_index(), + sxdp_queue_id: xdp.queue_id(), + sxdp_shared_umem_fd: xdp.shared_umem_fd(), + } +} + +#[cfg(target_os = "linux")] +unsafe fn write_sockaddr_xdp(xdp: &SocketAddrXdp, storage: *mut SocketAddrStorage) -> usize { + let encoded = encode_sockaddr_xdp(xdp); + core::ptr::write(storage.cast(), encoded); + size_of::() +} diff --git a/src/net/send_recv/mod.rs b/src/net/send_recv/mod.rs index cad2d5c6c..1ae4fdb39 100644 --- a/src/net/send_recv/mod.rs +++ b/src/net/send_recv/mod.rs @@ -3,6 +3,8 @@ #![allow(unsafe_code)] use crate::buffer::split_init; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; #[cfg(unix)] use crate::net::SocketAddrUnix; use crate::net::{SocketAddr, SocketAddrAny, SocketAddrV4, SocketAddrV6}; @@ -261,6 +263,8 @@ fn _sendto_any( SocketAddrAny::V6(v6) => backend::net::syscalls::sendto_v6(fd, buf, flags, v6), #[cfg(unix)] SocketAddrAny::Unix(unix) => backend::net::syscalls::sendto_unix(fd, buf, flags, unix), + #[cfg(target_os = "linux")] + SocketAddrAny::Xdp(xdp) => backend::net::syscalls::sendto_xdp(fd, buf, flags, xdp), } } @@ -378,3 +382,22 @@ pub fn sendto_unix( ) -> io::Result { backend::net::syscalls::sendto_unix(fd.as_fd(), buf, flags, addr) } + +/// `sendto(fd, buf, flags, addr, sizeof(struct sockaddr_xdp))`—Writes data +/// to a socket to a specific XDP address. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/sendto.2.html +#[cfg(target_os = "linux")] +#[inline] +#[doc(alias = "sendto")] +pub fn sendto_xdp( + fd: Fd, + buf: &[u8], + flags: SendFlags, + addr: &SocketAddrXdp, +) -> io::Result { + backend::net::syscalls::sendto_xdp(fd.as_fd(), buf, flags, addr) +} diff --git a/src/net/send_recv/msg.rs b/src/net/send_recv/msg.rs index 78fb8654c..629e4656a 100644 --- a/src/net/send_recv/msg.rs +++ b/src/net/send_recv/msg.rs @@ -708,6 +708,24 @@ pub fn sendmsg_unix( backend::net::syscalls::sendmsg_unix(socket.as_fd(), addr, iov, control, flags) } +/// `sendmsg(msghdr)`—Sends a message on a socket to a specific XDP address. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/sendmsg.2.html +#[inline] +#[cfg(target_os = "linux")] +pub fn sendmsg_xdp( + socket: impl AsFd, + addr: &super::SocketAddrXdp, + iov: &[IoSlice<'_>], + control: &mut SendAncillaryBuffer<'_, '_, '_>, + flags: SendFlags, +) -> io::Result { + backend::net::syscalls::sendmsg_xdp(socket.as_fd(), addr, iov, control, flags) +} + /// `sendmsg(msghdr)`—Sends a message on a socket to a specific address. /// /// # References @@ -748,6 +766,10 @@ pub fn sendmsg_any( Some(SocketAddrAny::Unix(addr)) => { backend::net::syscalls::sendmsg_unix(socket.as_fd(), addr, iov, control, flags) } + #[cfg(target_os = "linux")] + Some(SocketAddrAny::Xdp(addr)) => { + backend::net::syscalls::sendmsg_xdp(socket.as_fd(), addr, iov, control, flags) + } } } diff --git a/src/net/socket.rs b/src/net/socket.rs index c01b7a43a..bf1aa7c9f 100644 --- a/src/net/socket.rs +++ b/src/net/socket.rs @@ -3,6 +3,8 @@ use crate::net::{SocketAddr, SocketAddrAny, SocketAddrV4, SocketAddrV6}; use crate::{backend, io}; use backend::fd::{AsFd, BorrowedFd}; +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; pub use crate::net::{AddressFamily, Protocol, Shutdown, SocketFlags, SocketType}; #[cfg(unix)] pub use backend::net::addr::SocketAddrUnix; @@ -168,6 +170,8 @@ fn _bind_any(sockfd: BorrowedFd<'_>, addr: &SocketAddrAny) -> io::Result<()> { SocketAddrAny::V6(v6) => backend::net::syscalls::bind_v6(sockfd, v6), #[cfg(unix)] SocketAddrAny::Unix(unix) => backend::net::syscalls::bind_unix(sockfd, unix), + #[cfg(target_os = "linux")] + SocketAddrAny::Xdp(xdp) => backend::net::syscalls::bind_xdp(sockfd, xdp), } } @@ -271,6 +275,19 @@ pub fn bind_unix(sockfd: Fd, addr: &SocketAddrUnix) -> io::Result<()> backend::net::syscalls::bind_unix(sockfd.as_fd(), addr) } +/// `bind(sockfd, addr, sizeof(struct sockaddr_un))`—Binds a socket to a XDP address. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/bind.2.html +#[cfg(target_os = "linux")] +#[inline] +#[doc(alias = "bind")] +pub fn bind_xdp(sockfd: Fd, addr: &SocketAddrXdp) -> io::Result<()> { + backend::net::syscalls::bind_xdp(sockfd.as_fd(), addr) +} + /// `connect(sockfd, addr)`—Initiates a connection to an IP address. /// /// On Windows, a non-blocking socket returns [`Errno::WOULDBLOCK`] if the @@ -350,6 +367,8 @@ fn _connect_any(sockfd: BorrowedFd<'_>, addr: &SocketAddrAny) -> io::Result<()> SocketAddrAny::V6(v6) => backend::net::syscalls::connect_v6(sockfd, v6), #[cfg(unix)] SocketAddrAny::Unix(unix) => backend::net::syscalls::connect_unix(sockfd, unix), + #[cfg(target_os = "linux")] + SocketAddrAny::Xdp(_) => Err(io::Errno::OPNOTSUPP), } } diff --git a/src/net/socket_addr_any.rs b/src/net/socket_addr_any.rs index a649015f4..3be80a3ad 100644 --- a/src/net/socket_addr_any.rs +++ b/src/net/socket_addr_any.rs @@ -9,6 +9,8 @@ //! OS-specific socket address representations in memory. #![allow(unsafe_code)] +#[cfg(target_os = "linux")] +use crate::net::xdp::SocketAddrXdp; #[cfg(unix)] use crate::net::SocketAddrUnix; use crate::net::{AddressFamily, SocketAddr, SocketAddrV4, SocketAddrV6}; @@ -30,6 +32,9 @@ pub enum SocketAddrAny { /// `struct sockaddr_un` #[cfg(unix)] Unix(SocketAddrUnix), + /// `struct sockaddr_xdp` + #[cfg(target_os = "linux")] + Xdp(SocketAddrXdp), } impl From for SocketAddrAny { @@ -73,6 +78,8 @@ impl SocketAddrAny { Self::V6(_) => AddressFamily::INET6, #[cfg(unix)] Self::Unix(_) => AddressFamily::UNIX, + #[cfg(target_os = "linux")] + Self::Xdp(_) => AddressFamily::XDP, } } @@ -108,6 +115,8 @@ impl fmt::Debug for SocketAddrAny { Self::V6(v6) => v6.fmt(fmt), #[cfg(unix)] Self::Unix(unix) => unix.fmt(fmt), + #[cfg(target_os = "linux")] + Self::Xdp(xdp) => xdp.fmt(fmt), } } } diff --git a/src/net/sockopt.rs b/src/net/sockopt.rs index df04c4aab..47ab1a5be 100644 --- a/src/net/sockopt.rs +++ b/src/net/sockopt.rs @@ -143,6 +143,8 @@ #![doc(alias = "getsockopt")] #![doc(alias = "setsockopt")] +#[cfg(target_os = "linux")] +use crate::net::xdp::{XdpMmapOffsets, XdpOptionsFlags, XdpStatistics, XdpUmemReg}; #[cfg(not(any( apple, windows, @@ -1372,6 +1374,104 @@ pub fn get_socket_peercred(fd: Fd) -> io::Result { backend::net::sockopt::get_socket_peercred(fd.as_fd()) } +/// `setsockopt(fd, SOL_XDP, XDP_UMEM_REG, value)` +/// +/// On kernel versions only supporting v1, the flags are ignored. +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html#xdp-umem-reg-setsockopt +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_UMEM_REG")] +pub fn set_xdp_umem_reg(fd: Fd, value: XdpUmemReg) -> io::Result<()> { + backend::net::sockopt::set_xdp_umem_reg(fd.as_fd(), value) +} + +/// `setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, value)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html#xdp-rx-tx-umem-fill-umem-completion-ring-setsockopts +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_UMEM_FILL_RING")] +pub fn set_xdp_umem_fill_ring_size(fd: Fd, value: u32) -> io::Result<()> { + backend::net::sockopt::set_xdp_umem_fill_ring_size(fd.as_fd(), value) +} + +/// `setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, value)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html#xdp-rx-tx-umem-fill-umem-completion-ring-setsockopts +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_UMEM_COMPLETION_RING")] +pub fn set_xdp_umem_completion_ring_size(fd: Fd, value: u32) -> io::Result<()> { + backend::net::sockopt::set_xdp_umem_completion_ring_size(fd.as_fd(), value) +} + +/// `setsockopt(fd, SOL_XDP, XDP_TX_RING, value)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html#xdp-rx-tx-umem-fill-umem-completion-ring-setsockopts +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_TX_RING")] +pub fn set_xdp_tx_ring_size(fd: Fd, value: u32) -> io::Result<()> { + backend::net::sockopt::set_xdp_tx_ring_size(fd.as_fd(), value) +} + +/// `setsockopt(fd, SOL_XDP, XDP_RX_RING, value)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html#xdp-rx-tx-umem-fill-umem-completion-ring-setsockopts +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_RX_RING")] +pub fn set_xdp_rx_ring_size(fd: Fd, value: u32) -> io::Result<()> { + backend::net::sockopt::set_xdp_rx_ring_size(fd.as_fd(), value) +} + +/// `getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_MMAP_OFFSETS")] +pub fn get_xdp_mmap_offsets(fd: Fd) -> io::Result { + backend::net::sockopt::get_xdp_mmap_offsets(fd.as_fd()) +} + +/// `getsockopt(fd, SOL_XDP, XDP_STATISTICS)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html#xdp-statistics-getsockopt +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_STATISTICS")] +pub fn get_xdp_statistics(fd: Fd) -> io::Result { + backend::net::sockopt::get_xdp_statistics(fd.as_fd()) +} + +/// `getsockopt(fd, SOL_XDP, XDP_OPTIONS)` +/// +/// # References +/// - [Linux] +/// +/// [Linux]: https://www.kernel.org/doc/html/next/networking/af_xdp.html#xdp-options-getsockopt +#[cfg(target_os = "linux")] +#[doc(alias = "XDP_OPTIONS")] +pub fn get_xdp_options(fd: Fd) -> io::Result { + backend::net::sockopt::get_xdp_options(fd.as_fd()) +} + #[test] fn test_sizes() { use c::c_int; diff --git a/src/net/types.rs b/src/net/types.rs index ad60e36cb..dc0b752ba 100644 --- a/src/net/types.rs +++ b/src/net/types.rs @@ -588,6 +588,9 @@ impl AddressFamily { /// `AF_VSOCK` #[cfg(any(apple, target_os = "emscripten", target_os = "fuchsia"))] pub const VSOCK: Self = Self(c::AF_VSOCK as _); + /// `AF_XDP` + #[cfg(target_os = "linux")] + pub const XDP: Self = Self(c::AF_XDP as _); /// Constructs a `AddressFamily` from a raw integer. #[inline] @@ -1445,6 +1448,288 @@ bitflags! { } } +/// `AF_XDP` related types and constants. +#[cfg(target_os = "linux")] +pub mod xdp { + use super::{bitflags, c}; + + bitflags! { + /// `XDP_OPTIONS_*` constants returned by [`get_xdp_options`]. + /// + /// [`get_xdp_options`]: crate::net::sockopt::get_xdp_options + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpOptionsFlags: u32 { + /// `XDP_OPTIONS_ZEROCOPY` + const XDP_OPTIONS_ZEROCOPY = bitcast!(c::XDP_OPTIONS_ZEROCOPY); + } + } + + // Constant needs to be cast because bindgen does generate a u32 but the struct expects a u16. + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L15-L44 + bitflags! { + /// `XDP_*` constants for use in [`SockaddrXdp`]. + #[repr(transparent)] + #[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Debug)] + pub struct SockaddrXdpFlags: u16 { + /// `XDP_SHARED_UMEM` + const XDP_SHARED_UMEM = bitcast!(c::XDP_SHARED_UMEM as u16); + /// `XDP_COPY` + const XDP_COPY = bitcast!(c::XDP_COPY as u16); + /// `XDP_COPY` + const XDP_ZEROCOPY = bitcast!(c::XDP_ZEROCOPY as u16); + /// `XDP_USE_NEED_WAKEUP` + const XDP_USE_NEED_WAKEUP = bitcast!(c::XDP_USE_NEED_WAKEUP as u16); + // requires kernel 6.6 + /// `XDP_USE_SG` + const XDP_USE_SG = bitcast!(c::XDP_USE_SG as u16); + } + } + + bitflags! { + /// `XDP_RING_*` constants for use in fill and/or Tx ring. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpRingFlags: u32 { + /// `XDP_RING_NEED_WAKEUP` + const XDP_RING_NEED_WAKEUP = bitcast!(c::XDP_RING_NEED_WAKEUP); + } + } + + bitflags! { + /// `XDP_UMEM_*` constants for use in [`XdpUmemReg`]. + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpUmemRegFlags: u32 { + /// `XDP_UMEM_UNALIGNED_CHUNK_FLAG` + const XDP_UMEM_UNALIGNED_CHUNK_FLAG = bitcast!(c::XDP_UMEM_UNALIGNED_CHUNK_FLAG); + } + } + + /// A XDP socket address. + /// + /// Used to bind to XDP socket. + /// + /// Not ABI compatible with `struct sockaddr_xdp` + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L38-L44 + #[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Debug)] + pub struct SocketAddrXdp { + /// Flags. + sxdp_flags: SockaddrXdpFlags, + /// Interface index. + sxdp_ifindex: u32, + /// Queue ID. + sxdp_queue_id: u32, + /// Shared UMEM file descriptor. + sxdp_shared_umem_fd: u32, + } + + impl SocketAddrXdp { + /// Construct a new XDP address. + #[inline] + pub fn new( + flags: SockaddrXdpFlags, + interface_index: u32, + queue_id: u32, + share_umem_fd: u32, + ) -> Self { + Self { + sxdp_flags: flags, + sxdp_ifindex: interface_index, + sxdp_queue_id: queue_id, + sxdp_shared_umem_fd: share_umem_fd, + } + } + + /// Return flags. + #[inline] + pub fn flags(&self) -> SockaddrXdpFlags { + self.sxdp_flags + } + + /// Set flags. + #[inline] + pub fn set_flags(&mut self, flags: SockaddrXdpFlags) { + self.sxdp_flags = flags; + } + + /// Return interface index. + #[inline] + pub fn interface_index(&self) -> u32 { + self.sxdp_ifindex + } + + /// Set interface index. + #[inline] + pub fn set_interface_index(&mut self, interface_index: u32) { + self.sxdp_ifindex = interface_index; + } + + /// Return queue ID. + #[inline] + pub fn queue_id(&self) -> u32 { + self.sxdp_queue_id + } + + /// Set queue ID. + #[inline] + pub fn set_queue_id(&mut self, queue_id: u32) { + self.sxdp_queue_id = queue_id; + } + + /// Return shared UMEM file descriptor. + #[inline] + pub fn shared_umem_fd(&self) -> u32 { + self.sxdp_shared_umem_fd + } + + /// Set shared UMEM file descriptor. + #[inline] + pub fn set_shared_umem_fd(&mut self, shared_umem_fd: u32) { + self.sxdp_shared_umem_fd = shared_umem_fd; + } + } + + /// XDP ring offset. + /// + /// Used to mmap rings from kernel. + /// + /// Not ABI compatible with `struct xdp_ring_offset`. + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L49-L54 + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpRingOffset { + /// Producer offset. + pub producer: u64, + /// Consumer offset. + pub consumer: u64, + /// Descriptors offset. + pub desc: u64, + /// Flags offset. + /// + /// Is `None` if the kernel version (<5.4) does not yet support flags. + pub flags: Option, + } + + /// XDP mmap offsets. + /// + /// Not ABI compatible with `struct xdp_mmap_offsets` + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L56-L61 + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpMmapOffsets { + /// Rx ring offsets. + pub rx: XdpRingOffset, + /// Tx ring offsets. + pub tx: XdpRingOffset, + /// Fill ring offsets. + pub fr: XdpRingOffset, + /// Completion ring offsets. + pub cr: XdpRingOffset, + } + + /// XDP umem registration. + /// + /// `struct xdp_umem_reg` + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L73-L79 + #[repr(C)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpUmemReg { + /// Start address of UMEM. + pub addr: u64, + /// Umem length in bytes. + pub len: u64, + /// Chunk size in bytes. + pub chunk_size: u32, + /// Headroom in bytes. + pub headroom: u32, + /// Flags. + /// + /// Requires kernel version 5.4. + pub flags: XdpUmemRegFlags, + } + + /// XDP statistics. + /// + /// Not ABI compatible with `struct xdp_statistics` + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L81-L88 + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpStatistics { + /// Rx dropped. + pub rx_dropped: u64, + /// Rx invalid descriptors. + pub rx_invalid_descs: u64, + /// Tx invalid descriptors. + pub tx_invalid_descs: u64, + /// Rx ring full. + /// + /// Is `None` if the kernel version (<5.9) does not yet support flags. + pub rx_ring_full: Option, + /// Rx fill ring empty descriptors. + /// + /// Is `None` if the kernel version (<5.9) does not yet support flags. + pub rx_fill_ring_empty_descs: Option, + /// Tx ring empty descriptors. + /// + /// Is `None` if the kernel version (<5.9) does not yet support flags. + pub tx_ring_empty_descs: Option, + } + + /// XDP options. + /// + /// Requires kernel version 5.3. + /// `struct xdp_options` + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L90-L92 + #[repr(C)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpOptions { + /// Flags. + pub flags: XdpOptionsFlags, + } + + /// XDP rx/tx frame descriptor. + /// + /// `struct xdp_desc` + // https://github.com/torvalds/linux/blob/v6.6/include/uapi/linux/if_xdp.h#L109-L113 + #[repr(C)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + pub struct XdpDesc { + /// Offset from the start of the UMEM. + pub addr: u64, + /// Length of packet in bytes. + pub len: u32, + /// Options. + pub options: XdpDescOptions, + } + + #[cfg(target_os = "linux")] + bitflags! { + #[repr(transparent)] + #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] + /// `XDP_*` constants for use in [`XdpDesc`]. + /// + /// Requires kernel version 6.6. + pub struct XdpDescOptions: u32 { + /// `XDP_PKT_CONTD` + const XDP_PKT_CONTD = bitcast!(c::XDP_PKT_CONTD); + } + } + + /// Offset for mmapping rx ring. + pub const XDP_PGOFF_RX_RING: u64 = c::XDP_PGOFF_RX_RING as u64; + /// Offset for mmapping tx ring. + pub const XDP_PGOFF_TX_RING: u64 = c::XDP_PGOFF_TX_RING as u64; + /// Offset for mmapping fill ring. + pub const XDP_UMEM_PGOFF_FILL_RING: u64 = c::XDP_UMEM_PGOFF_FILL_RING; + /// Offset for mmapping completion ring. + pub const XDP_UMEM_PGOFF_COMPLETION_RING: u64 = c::XDP_UMEM_PGOFF_COMPLETION_RING; + + /// Offset used to shift the [`XdpDesc`] addr to the right to extract the address offset in + /// unaligned mode. + pub const XSK_UNALIGNED_BUF_OFFSET_SHIFT: u64 = c::XSK_UNALIGNED_BUF_OFFSET_SHIFT as u64; + /// Mask used to binary `and` the [`XdpDesc`] addr to extract the address without the offset + /// carried in the upper 16 bits of the address in unaligned mode. + pub const XSK_UNALIGNED_BUF_ADDR_MASK: u64 = c::XSK_UNALIGNED_BUF_ADDR_MASK; +} + /// UNIX credentials of socket peer, for use with [`get_socket_peercred`] /// [`SendAncillaryMessage::ScmCredentials`] and /// [`RecvAncillaryMessage::ScmCredentials`]. @@ -1466,6 +1751,7 @@ pub struct UCred { #[test] fn test_sizes() { + use crate::backend::c; use c::c_int; use core::mem::transmute; @@ -1492,4 +1778,11 @@ fn test_sizes() { #[cfg(linux_kernel)] assert_eq_size!(UCred, libc::ucred); + + #[cfg(target_os = "linux")] + assert_eq_size!(super::xdp::XdpUmemReg, c::xdp_umem_reg); + #[cfg(target_os = "linux")] + assert_eq_size!(super::xdp::XdpOptions, c::xdp_options); + #[cfg(target_os = "linux")] + assert_eq_size!(super::xdp::XdpDesc, c::xdp_desc); }