Skip to content

Commit

Permalink
Don't unwrap when removing invalid TCP state from fastpath. (#621)
Browse files Browse the repository at this point in the history
Now that the fastpath temporarily drops (and reacquires) the port lock
when TCP state needs to be invalidated, we opened ourselves up to the
possibility that another packet could have removed this state ahead of
us. Equally, a packet could insert new TCP state which we might
accidentally remove.

This PR removes the unwrap on removal to account for the race, and only
removes TCP flows if they are pointer-equal.

Closes #618, closes #624.
  • Loading branch information
FelixMcFelix authored Dec 3, 2024
1 parent d2334e9 commit b56afee
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 5 deletions.
17 changes: 14 additions & 3 deletions lib/opte-ioctl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ where

// It would be a shame if the command failed and we didn't have enough bytes
// to serialize the error response, so we set this to default to 16 KiB.
let mut resp_buf = Vec::with_capacity(16 * 1024);
const BASE_CAPACITY: usize = 16 * 1024;
let mut resp_buf = Vec::with_capacity(BASE_CAPACITY);
let mut rioctl = OpteCmdIoctl {
api_version: API_VERSION,
cmd,
Expand Down Expand Up @@ -313,8 +314,18 @@ where
if raw_err == libc::ENOBUFS {
assert!(rioctl.resp_len_actual != 0);
assert!(rioctl.resp_len_actual > resp_buf.capacity());
// Make room for the size the kernel claims to need
resp_buf.reserve(rioctl.resp_len_actual - resp_buf.capacity());

// Make room for at least the size the kernel claims to need.
// This can be slightly tricky: since every retry reruns the
// command, the size of the next resp could change from under
// us (increase or decrease). Keep some headroom to account
// for this.
let wanted_capacity =
BASE_CAPACITY / 4 + rioctl.resp_len_actual;

// XDE could write into `resp_buf` (but does not).
// .len() *should* be zero -- but don't bank on it.
resp_buf.reserve(wanted_capacity - resp_buf.len());
rioctl.resp_bytes = resp_buf.as_mut_ptr();
rioctl.resp_len = resp_buf.capacity();
rioctl.resp_len_actual = 0;
Expand Down
14 changes: 12 additions & 2 deletions lib/opte/src/engine/port.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1395,9 +1395,19 @@ impl<N: NetworkImpl> Port<N> {
let ufid_out = &flow_lock.outbound_ufid;

let ufid_in = flow_lock.inbound_ufid.as_ref();
self.uft_tcp_closed(&mut local_lock, ufid_out, ufid_in);

let _ = local_lock.tcp_flows.remove(ufid_out).unwrap();
// Because we've dropped the port lock, another packet could have also
// invalidated this flow and removed the entry. It could even install
// new UFT/TCP entries, depending on lock/process ordering.
//
// Verify that the state we want to remove still exists, and is
// `Arc`-identical.
if let Some(found_entry) = local_lock.tcp_flows.get(ufid_out) {
if Arc::ptr_eq(found_entry, &entry) {
self.uft_tcp_closed(&mut local_lock, ufid_out, ufid_in);
_ = local_lock.tcp_flows.remove(ufid_out);
}
}

if reprocess {
lock = Some(local_lock);
Expand Down

0 comments on commit b56afee

Please sign in to comment.