From da99ffea8ff10a33d17f0a7418cb086ba6b8fff1 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 24 Mar 2024 23:02:03 +0000 Subject: [PATCH 1/3] Revert "[Bugfix] use SoftLockFile instead of LockFile (#3578)" This reverts commit 743a0b74021b466088924d1a1228031bdedba896. --- vllm/model_executor/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py index 0d7ee269922f4..24d78db8d2637 100644 --- a/vllm/model_executor/weight_utils.py +++ b/vllm/model_executor/weight_utils.py @@ -34,7 +34,7 @@ def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None): lock_dir = cache_dir if cache_dir is not None else _vllm_filelocks_path os.makedirs(os.path.dirname(lock_dir), exist_ok=True) lock_file_name = model_name_or_path.replace("/", "-") + ".lock" - lock = filelock.SoftFileLock(os.path.join(lock_dir, lock_file_name)) + lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name)) return lock From a1aba04a5613066dc0956955ad363b0880c8fc76 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 24 Mar 2024 18:24:20 -0700 Subject: [PATCH 2/3] avoid conflict with existing user lock; use mode 666 and system level temp directory --- vllm/model_executor/weight_utils.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py index 24d78db8d2637..e4da1306f09c8 100644 --- a/vllm/model_executor/weight_utils.py +++ b/vllm/model_executor/weight_utils.py @@ -1,6 +1,7 @@ """Utilities for downloading and initializing model weights.""" import filelock import glob +import hashlib import fnmatch import json import os @@ -20,8 +21,12 @@ logger = init_logger(__name__) -_xdg_cache_home = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) -_vllm_filelocks_path = os.path.join(_xdg_cache_home, 'vllm/locks/') +# use system-level temp directory for file locks, so that multiple users +# can share the same lock without error. +# lock files in the temp directory will be automatically deleted when the +# system reboots, so users will not complain about annoying lock files +temp_dir = os.environ.get('TMPDIR') or os.environ.get( + 'TEMP') or os.environ.get('TMP') class Disabledtqdm(tqdm): @@ -31,10 +36,15 @@ def __init__(self, *args, **kwargs): def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None): - lock_dir = cache_dir if cache_dir is not None else _vllm_filelocks_path + lock_dir = cache_dir or temp_dir os.makedirs(os.path.dirname(lock_dir), exist_ok=True) - lock_file_name = model_name_or_path.replace("/", "-") + ".lock" - lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name)) + model_name = model_name_or_path.replace("/", "-") + hash_name = hashlib.sha256(model_name.encode()).hexdigest() + # add hash to avoid conflict with old users' lock files + lock_file_name = hash_name + model_name + ".lock" + # mode 0o666 is required for the filelock to be shared across users + lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), + mode=0o666) return lock From f2c122c3c24e6bbed9f64d8c34b43a435ab0906d Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 24 Mar 2024 18:25:07 -0700 Subject: [PATCH 3/3] add /tmp as default --- vllm/model_executor/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py index e4da1306f09c8..7eebe151754ba 100644 --- a/vllm/model_executor/weight_utils.py +++ b/vllm/model_executor/weight_utils.py @@ -26,7 +26,7 @@ # lock files in the temp directory will be automatically deleted when the # system reboots, so users will not complain about annoying lock files temp_dir = os.environ.get('TMPDIR') or os.environ.get( - 'TEMP') or os.environ.get('TMP') + 'TEMP') or os.environ.get('TMP') or "/tmp/" class Disabledtqdm(tqdm):