From 20478c4d3abcd0aa8a1d9ace9c76ea3a2e04cb5e Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 19 Mar 2024 14:34:15 -0700 Subject: [PATCH] Use lru_cache for some environment detection utils (#3508) --- vllm/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/utils.py b/vllm/utils.py index d4a8c962c3bfc..7c73062e809f3 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -11,7 +11,7 @@ import psutil import torch import asyncio -from functools import partial +from functools import partial, lru_cache from typing import ( Awaitable, Callable, @@ -120,6 +120,7 @@ def is_hip() -> bool: return torch.version.hip is not None +@lru_cache(maxsize=None) def is_neuron() -> bool: try: import transformers_neuronx @@ -128,6 +129,7 @@ def is_neuron() -> bool: return transformers_neuronx is not None +@lru_cache(maxsize=None) def get_max_shared_memory_bytes(gpu: int = 0) -> int: """Returns the maximum shared memory per thread block in bytes.""" # NOTE: This import statement should be executed lazily since @@ -151,6 +153,7 @@ def random_uuid() -> str: return str(uuid.uuid4().hex) +@lru_cache(maxsize=None) def in_wsl() -> bool: # Reference: https://github.com/microsoft/WSL/issues/4071 return "microsoft" in " ".join(uname()).lower() @@ -225,6 +228,7 @@ def set_cuda_visible_devices(device_ids: List[int]) -> None: os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, device_ids)) +@lru_cache(maxsize=None) def get_nvcc_cuda_version() -> Optional[Version]: cuda_home = os.environ.get('CUDA_HOME') if not cuda_home: