diff --git a/.changelog/3023.bugfix.md b/.changelog/3023.bugfix.md new file mode 100644 index 00000000000..b77ebc6c6b9 --- /dev/null +++ b/.changelog/3023.bugfix.md @@ -0,0 +1 @@ +runtime/dispatcher: Break recv loop on abort request diff --git a/go/runtime/host/tests/tester.go b/go/runtime/host/tests/tester.go index bdd564913af..9d8bce6472b 100644 --- a/go/runtime/host/tests/tester.go +++ b/go/runtime/host/tests/tester.go @@ -19,9 +19,14 @@ import ( "github.com/oasisprotocol/oasis-core/go/runtime/host/protocol" ) -// This needs to be large as some runtimes can take a long time to initialize due to remote -// attestation taking a long time. -const recvTimeout = 120 * time.Second +const ( + // This needs to be large as some runtimes can take a long time to initialize due to remote + // attestation taking a long time. + recvTimeout = 120 * time.Second + + // Runtime is already started at this point so we can have a smaller timeout than above. + recvAbortTimeout = 10 * time.Second +) // mockMessageHandler is a mock message handler which only implements a small subset of methods. type mockMessageHandler struct{} @@ -192,7 +197,21 @@ func testRestart(t *testing.T, cfg host.Config, p host.Provisioner) { select { case ev := <-evCh: require.Nil(ev.Stopped, "unexpected stop event") - case <-time.After(recvTimeout): + case <-time.After(recvAbortTimeout): } + // Trigger another non-force abort (runtime should not be restarted). + // NOTE: the above Abort request makes it to the dispatcher before the first + // iteration of the dispatch loop, therefore the request is handled before + // the dispatcher is stuck in the recv loop. Here it is ensured that the + // runtime dispatcher is already running and waiting on requests. + err = r.Abort(context.Background(), false) + require.NoError(err, "Abort(force=false)") + + // There should be no stop event. + select { + case ev := <-evCh: + require.Nil(ev.Stopped, "unexpected stop event") + case <-time.After(recvAbortTimeout): + } } diff --git a/runtime/src/dispatcher.rs b/runtime/src/dispatcher.rs index 34cde839bcc..1391034b218 100644 --- a/runtime/src/dispatcher.rs +++ b/runtime/src/dispatcher.rs @@ -155,8 +155,11 @@ impl Dispatcher { /// Signals to dispatcher that it should abort and waits for the abort to /// complete. - pub fn abort_and_wait(&self) -> Result<()> { + pub fn abort_and_wait(&self, ctx: Context, id: u64, req: Body) -> Result<()> { self.abort_batch.store(true, Ordering::SeqCst); + // Queue the request to break the dispatch loop in case nothing is + // being processed at the moment. + self.queue_request(ctx, id, req)?; // Wait for abort. self.abort_rx.recv().map_err(|error| anyhow!("{}", error)) } @@ -286,6 +289,11 @@ impl Dispatcher { signed_policy_raw, ); } + Ok((_ctx, _id, Body::RuntimeAbortRequest {})) => { + // We handle the RuntimeAbortRequest here so that we break + // the recv loop and re-check abort flag. + info!(self.logger, "Received abort request"); + } Ok(_) => { error!(self.logger, "Unsupported request type"); break 'dispatch; diff --git a/runtime/src/protocol.rs b/runtime/src/protocol.rs index f4a0e206bf5..485c7d39a9b 100644 --- a/runtime/src/protocol.rs +++ b/runtime/src/protocol.rs @@ -268,10 +268,10 @@ impl Protocol { info!(self.logger, "Received worker shutdown request"); Err(ProtocolError::MethodNotSupported.into()) } - Body::RuntimeAbortRequest {} => { + req @ Body::RuntimeAbortRequest {} => { info!(self.logger, "Received worker abort request"); self.can_handle_runtime_requests()?; - self.dispatcher.abort_and_wait()?; + self.dispatcher.abort_and_wait(ctx, id, req)?; info!(self.logger, "Handled worker abort request"); Ok(Some(Body::RuntimeAbortResponse {})) }