Skip to content

Commit

Permalink
[Core] Use os.sched_yield in ShmRingBuffer instead of time.sleep (vll…
Browse files Browse the repository at this point in the history
…m-project#9994)

Signed-off-by: Tyler Michael Smith <[email protected]>
  • Loading branch information
tlrmchlsmth authored Nov 5, 2024
1 parent 8f0a9ca commit 04bbf38
Showing 1 changed file with 5 additions and 10 deletions.
15 changes: 5 additions & 10 deletions vllm/distributed/device_communicators/shm_broadcast.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pickle
import time
from contextlib import contextmanager
Expand All @@ -18,12 +19,6 @@

VLLM_RINGBUFFER_WARNING_INTERVAL = envs.VLLM_RINGBUFFER_WARNING_INTERVAL

# time to wait if the queue is full or empty
# if we sleep for too short, it will consume too much CPU
# if we sleep for too long, it will slow down the writer/reader
# 0.1 us is a good balance
RINGBUFFER_SLEEP_INTERVAL = 1e-7

logger = init_logger(__name__)


Expand Down Expand Up @@ -333,8 +328,8 @@ def acquire_write(self):
# if this block is not ready to write,
# we need to wait until it is read by all readers

# wait for a while
time.sleep(RINGBUFFER_SLEEP_INTERVAL)
# Release the processor to other threads
os.sched_yield()

# if we wait for a long time, we should warn the user
if (time.monotonic() - start_time >
Expand Down Expand Up @@ -387,8 +382,8 @@ def acquire_read(self):
# if this block is not ready,
# we need to wait until it is written

# wait for a while
time.sleep(RINGBUFFER_SLEEP_INTERVAL)
# Release the processor to other threads
os.sched_yield()

# if we wait for a long time, we should warn the user
if (time.monotonic() - start_time >
Expand Down

0 comments on commit 04bbf38

Please sign in to comment.