Skip to content

Commit

Permalink
Merge pull request #5875 from kevin-bates/kernel-list-race-condition
Browse files Browse the repository at this point in the history
Fix race condition with async kernel management
  • Loading branch information
Zsailer authored Dec 23, 2020
2 parents 24bf3a5 + ed0c640 commit 6345008
Showing 1 changed file with 17 additions and 6 deletions.
23 changes: 17 additions & 6 deletions notebook/services/kernels/kernelmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,19 @@ def shutdown_kernel(self, kernel_id, now=False, restart=False):
kernel._activity_stream.close()
kernel._activity_stream = None
self.stop_buffering(kernel_id)
self._kernel_connections.pop(kernel_id, None)

# Decrease the metric of number of kernels
# running for the relevant kernel type by 1
KERNEL_CURRENTLY_RUNNING_TOTAL.labels(
type=self._kernels[kernel_id].kernel_name
).dec()

return self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart)
self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart)
# Unlike its async sibling method in AsyncMappingKernelManager, removing the kernel_id
# from the connections dictionary isn't as problematic before the shutdown since the
# method is synchronous. However, we'll keep the relative call orders the same from
# a maintenance perspective.
self._kernel_connections.pop(kernel_id, None)

async def restart_kernel(self, kernel_id, now=False):
"""Restart a kernel by kernel_id"""
Expand Down Expand Up @@ -376,8 +380,11 @@ def list_kernels(self):
kernels = []
kernel_ids = self.pinned_superclass.list_kernel_ids(self)
for kernel_id in kernel_ids:
model = self.kernel_model(kernel_id)
kernels.append(model)
try:
model = self.kernel_model(kernel_id)
kernels.append(model)
except (web.HTTPError, KeyError):
pass # Probably due to a (now) non-existent kernel, continue building the list
return kernels

# override _check_kernel_id to raise 404 instead of KeyError
Expand Down Expand Up @@ -498,12 +505,16 @@ async def shutdown_kernel(self, kernel_id, now=False, restart=False):
kernel._activity_stream.close()
kernel._activity_stream = None
self.stop_buffering(kernel_id)
self._kernel_connections.pop(kernel_id, None)

# Decrease the metric of number of kernels
# running for the relevant kernel type by 1
KERNEL_CURRENTLY_RUNNING_TOTAL.labels(
type=self._kernels[kernel_id].kernel_name
).dec()

return await self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart)
await self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart)
# Remove kernel_id from the connections dictionary only after kernel has been shutdown,
# otherwise a race condition can occur since the shutdown may take a while - allowing
# list/fetch kernel operations to access _kernel_connections for a non-existent key
# (kernel_id) while "awaiting" the result of the shutdown.
self._kernel_connections.pop(kernel_id, None)

0 comments on commit 6345008

Please sign in to comment.