Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.10] gh-116773: Fix overlapped memory corruption crash (GH-116774) #117079

Merged
merged 2 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions Lib/asyncio/windows_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,13 +323,13 @@ def run_forever(self):
if self._self_reading_future is not None:
ov = self._self_reading_future._ov
self._self_reading_future.cancel()
# self_reading_future was just cancelled so if it hasn't been
# finished yet, it never will be (it's possible that it has
# already finished and its callback is waiting in the queue,
# where it could still happen if the event loop is restarted).
# Unregister it otherwise IocpProactor.close will wait for it
# forever
if ov is not None:
# self_reading_future always uses IOCP, so even though it's
# been cancelled, we need to make sure that the IOCP message
# is received so that the kernel is not holding on to the
# memory, possibly causing memory corruption later. Only
# unregister it if IO is complete in all respects. Otherwise
# we need another _poll() later to complete the IO.
if ov is not None and not ov.pending:
self._proactor._unregister(ov)
self._self_reading_future = None

Expand Down
50 changes: 45 additions & 5 deletions Lib/test/test_asyncio/test_windows_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,23 @@ def data_received(self, data):
self.trans.close()


class ProactorLoopCtrlC(test_utils.TestCase):
class WindowsEventsTestCase(test_utils.TestCase):
def _unraisablehook(self, unraisable):
# Storing unraisable.object can resurrect an object which is being
# finalized. Storing unraisable.exc_value creates a reference cycle.
self._unraisable = unraisable
print(unraisable)

def setUp(self):
self._prev_unraisablehook = sys.unraisablehook
self._unraisable = None
sys.unraisablehook = self._unraisablehook

def tearDown(self):
sys.unraisablehook = self._prev_unraisablehook
self.assertIsNone(self._unraisable)

class ProactorLoopCtrlC(WindowsEventsTestCase):

def test_ctrl_c(self):

Expand All @@ -58,7 +74,7 @@ def SIGINT_after_delay():
thread.join()


class ProactorMultithreading(test_utils.TestCase):
class ProactorMultithreading(WindowsEventsTestCase):
def test_run_from_nonmain_thread(self):
finished = False

Expand All @@ -79,7 +95,7 @@ def func():
self.assertTrue(finished)


class ProactorTests(test_utils.TestCase):
class ProactorTests(WindowsEventsTestCase):

def setUp(self):
super().setUp()
Expand Down Expand Up @@ -290,8 +306,32 @@ async def probe():

return "done"


class WinPolicyTests(test_utils.TestCase):
def test_loop_restart(self):
# We're fishing for the "RuntimeError: <_overlapped.Overlapped object at XXX>
# still has pending operation at deallocation, the process may crash" error
stop = threading.Event()
def threadMain():
while not stop.is_set():
self.loop.call_soon_threadsafe(lambda: None)
time.sleep(0.01)
thr = threading.Thread(target=threadMain)

# In 10 60-second runs of this test prior to the fix:
# time in seconds until failure: (none), 15.0, 6.4, (none), 7.6, 8.3, 1.7, 22.2, 23.5, 8.3
# 10 seconds had a 50% failure rate but longer would be more costly
end_time = time.time() + 10 # Run for 10 seconds
self.loop.call_soon(thr.start)
while not self._unraisable: # Stop if we got an unraisable exc
self.loop.stop()
self.loop.run_forever()
if time.time() >= end_time:
break

stop.set()
thr.join()


class WinPolicyTests(WindowsEventsTestCase):

def test_selector_win_policy(self):
async def main():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix instances of ``<_overlapped.Overlapped object at 0xXXX> still has pending operation at deallocation, the process may crash``.
18 changes: 18 additions & 0 deletions Modules/overlapped.c
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,24 @@ Overlapped_dealloc(OverlappedObject *self)
if (!HasOverlappedIoCompleted(&self->overlapped) &&
self->type != TYPE_NOT_STARTED)
{
// NOTE: We should not get here, if we do then something is wrong in
// the IocpProactor or ProactorEventLoop. Since everything uses IOCP if
// the overlapped IO hasn't completed yet then we should not be
// deallocating!
//
// The problem is likely that this OverlappedObject was removed from
// the IocpProactor._cache before it was complete. The _cache holds a
// reference while IO is pending so that it does not get deallocated
// while the kernel has retained the OVERLAPPED structure.
//
// CancelIoEx (likely called from self.cancel()) may have successfully
// completed, but the OVERLAPPED is still in use until either
// HasOverlappedIoCompleted() is true or GetQueuedCompletionStatus has
// returned this OVERLAPPED object.
//
// NOTE: Waiting when IOCP is in use can hang indefinitely, but this
// CancelIoEx is superfluous in that self.cancel() was already called,
// so I've only ever seen this return FALSE with GLE=ERROR_NOT_FOUND
if (Py_CancelIoEx && Py_CancelIoEx(self->handle, &self->overlapped))
wait = TRUE;

Expand Down
Loading