Fix response preprocessing bug (#646)

* Fix empty response bug * Fix unused variable
triton-inference-server · May 11, 2024 · 9499436 · 9499436
1 parent 8e5e6d1
commit 9499436
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 2 deletions.
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py
@@ -600,13 +600,13 @@ def _preprocess_response(
 
             # Remove responses without any content
             # These are only observed to happen at the start or end
-            while res_outputs[0] and self._is_openai_empty_response(
+            while res_outputs and self._is_openai_empty_response(
                 res_outputs[0]["response"]
             ):
                 res_timestamps.pop(0)
                 res_outputs.pop(0)
 
-            while res_outputs[-1] and self._is_openai_empty_response(
+            while res_outputs and self._is_openai_empty_response(
                 res_outputs[-1]["response"]
             ):
                 res_timestamps.pop()

diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py
@@ -74,6 +74,9 @@ def write(self: Any, content: str) -> int:
             elif filename == "openai_profile_export.json":
                 tmp_file = StringIO(json.dumps(self.openai_profile_data))
                 return tmp_file
+            elif filename == "empty_profile_export.json":
+                tmp_file = StringIO(json.dumps(self.empty_profile_data))
+                return tmp_file
             elif filename == "profile_export.csv":
                 tmp_file = StringIO()
                 tmp_file.write = write.__get__(tmp_file)
@@ -401,6 +404,47 @@ def test_llm_metrics_get_base_name(self) -> None:
         with pytest.raises(KeyError):
             metrics.get_base_name("hello1234")
 
+    def test_empty_response(self, mock_read_write: pytest.MonkeyPatch) -> None:
+        """Check if it handles all empty responses."""
+        tokenizer = get_tokenizer(DEFAULT_TOKENIZER)
+
+        # Should not throw error
+        _ = LLMProfileDataParser(
+            filename=Path("empty_profile_export.json"),
+            tokenizer=tokenizer,
+        )
+
+    empty_profile_data = {
+        "service_kind": "openai",
+        "endpoint": "v1/chat/completions",
+        "experiments": [
+            {
+                "experiment": {
+                    "mode": "concurrency",
+                    "value": 10,
+                },
+                "requests": [
+                    {
+                        "timestamp": 1,
+                        "request_inputs": {
+                            "payload": '{"messages":[{"role":"user","content":"This is test"}],"model":"llama-2-7b","stream":true}',
+                        },
+                        "response_timestamps": [3, 5, 8],
+                        "response_outputs": [
+                            {
+                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n'
+                            },
+                            {
+                                "response": 'data: {"id":"abc","object":"chat.completion.chunk","created":123,"model":"llama-2-7b","choices":[{"index":0,"delta":{"content":""},"finish_reason":null}]}\n\n'
+                            },
+                            {"response": "data: [DONE]\n\n"},
+                        ],
+                    },
+                ],
+            },
+        ],
+    }
+
     openai_profile_data = {
         "service_kind": "openai",
         "endpoint": "v1/chat/completions",