From 4dcf52fbb5640d24f43950feb3425408f044771d Mon Sep 17 00:00:00 2001 From: "Allen.Dou" Date: Tue, 23 Jul 2024 21:48:42 +0800 Subject: [PATCH 1/4] [bugfix] do_log_stats() Must call prometheus's logger first. --- vllm/engine/llm_engine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index eabe3b23a9d58..acd6de171eec9 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -949,8 +949,10 @@ def do_log_stats( model_output: Optional[List[SamplerOutput]] = None) -> None: """Forced log when no requests active.""" if self.log_stats: - for logger in self.stat_loggers.values(): - logger.log(self._get_stats(scheduler_outputs, model_output)) + logger = self.stat_loggers["prometheus"] + logger.log(self._get_stats(scheduler_outputs, model_output)) + logger = self.stat_loggers["logging"] + logger.log(self._get_stats(scheduler_outputs, model_output)) def _get_stats( self, From 1e6c512c4d42dfb1eab44b59f6c8a52c0c1b2010 Mon Sep 17 00:00:00 2001 From: "Allen.Dou" Date: Wed, 24 Jul 2024 01:12:22 +0800 Subject: [PATCH 2/4] need to call _get_stats() only once. --- vllm/engine/llm_engine.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index acd6de171eec9..f981f096e99ca 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -948,11 +948,9 @@ def do_log_stats( scheduler_outputs: Optional[SchedulerOutputs] = None, model_output: Optional[List[SamplerOutput]] = None) -> None: """Forced log when no requests active.""" - if self.log_stats: - logger = self.stat_loggers["prometheus"] - logger.log(self._get_stats(scheduler_outputs, model_output)) - logger = self.stat_loggers["logging"] - logger.log(self._get_stats(scheduler_outputs, model_output)) + stats = self._get_stats(scheduler_outputs, model_output) + for logger in self.stat_loggers.values(): + logger.log(stats) def _get_stats( self, From 995d3b59df4b252be6b129f4f02f2209f553a663 Mon Sep 17 00:00:00 2001 From: Antoni Baum Date: Tue, 23 Jul 2024 10:15:20 -0700 Subject: [PATCH 3/4] Update vllm/engine/llm_engine.py --- vllm/engine/llm_engine.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index f981f096e99ca..48d5305892219 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -948,9 +948,10 @@ def do_log_stats( scheduler_outputs: Optional[SchedulerOutputs] = None, model_output: Optional[List[SamplerOutput]] = None) -> None: """Forced log when no requests active.""" - stats = self._get_stats(scheduler_outputs, model_output) - for logger in self.stat_loggers.values(): - logger.log(stats) + if self.log_stats: + stats = self._get_stats(scheduler_outputs, model_output) + for logger in self.stat_loggers.values(): + logger.log(stats) def _get_stats( self, From 7c9b78d7e3b5c4b6f6e5dda56b00f79b52b87188 Mon Sep 17 00:00:00 2001 From: "Allen.Dou" Date: Wed, 24 Jul 2024 19:58:00 +0800 Subject: [PATCH 4/4] typo fix. --- vllm/spec_decode/spec_decode_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py index 8cf0aa5b8981a..98960b88f719f 100644 --- a/vllm/spec_decode/spec_decode_worker.py +++ b/vllm/spec_decode/spec_decode_worker.py @@ -484,7 +484,7 @@ def _run_non_driver_rank(self) -> bool: for both speculation cases (num_lookahead_slots>0) and non-speculation cases (e.g. prefill). - Returns True iff there are remaining sequences to process. + Returns True if there are remaining sequences to process. """ assert self.rank != self._driver_rank