Skip to content

Commit

Permalink
[Bugfix] Fix divide by zero when serving Mamba models (vllm-project#9617
Browse files Browse the repository at this point in the history
)

Signed-off-by: Tyler Michael Smith <[email protected]>
Signed-off-by: NickLucche <[email protected]>
  • Loading branch information
tlrmchlsmth authored and NickLucche committed Oct 31, 2024
1 parent a426ab9 commit 0bdbde1
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,15 +1612,15 @@ def _get_stats(self,
# KV Cache Usage in %
num_total_gpu = self.cache_config.num_gpu_blocks
gpu_cache_usage_sys = 0.
if num_total_gpu is not None:
if num_total_gpu: # Guard against both None and 0
num_free_gpu = sum(
scheduler.block_manager.get_num_free_gpu_blocks()
for scheduler in self.scheduler)
gpu_cache_usage_sys = 1.0 - (num_free_gpu / num_total_gpu)

num_total_cpu = self.cache_config.num_cpu_blocks
cpu_cache_usage_sys = 0.
if num_total_cpu is not None and num_total_cpu > 0:
if num_total_cpu: # Guard against both None and 0
num_free_cpu = sum(
scheduler.block_manager.get_num_free_cpu_blocks()
for scheduler in self.scheduler)
Expand Down

0 comments on commit 0bdbde1

Please sign in to comment.