vllm-project · simon-mo · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py
@@ -41,6 +41,8 @@ def _can_use_flash_attn(dtype: torch.dtype) -> bool:
     try:
         import flash_attn  # noqa: F401
     except ImportError:
-        logger.info("flash_attn is not found.")
+        logger.info(
+            "Cannot use FlashAttention because the package is not found. "
+            "Please install it for better performance.")
         return False
     return True
@@ -230,13 +230,12 @@ def __init__(
         self.watermark_blocks = int(watermark * num_gpu_blocks)
 
         if self.enable_caching:
-            logger.info("enable automatic prefix caching")
+            logger.info("Automatic prefix caching is enabled.")
             self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size,
                                                       num_gpu_blocks)
             self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size,
                                                       num_cpu_blocks)
         else:
-            logger.info("disable automatic prefix caching")
             self.gpu_allocator = UncachedBlockAllocator(
                 Device.GPU, block_size, num_gpu_blocks)
             self.cpu_allocator = UncachedBlockAllocator(

diff --git a/vllm/model_executor/parallel_utils/pynccl_utils.py b/vllm/model_executor/parallel_utils/pynccl_utils.py
@@ -10,7 +10,6 @@
 try:
     from vllm.model_executor.parallel_utils.pynccl import (NCCLCommunicator,
                                                            ncclGetVersion)
-    logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
 except Exception as e:
     # in non-NVIDIA environments, we can't import the nccl module
     # e.g. when running on machines with AMD GPUs
@@ -39,6 +38,7 @@ def set_pynccl_stream(stream: torch.cuda.Stream):
 def init_process_group(world_size: int, rank: int, init_method: str) -> None:
     assert not is_initialized()
     global comm
+    logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
     comm = NCCLCommunicator(init_method=init_method,
                             world_size=world_size,
                             rank=rank)