From 790f97e802a729f71098c12f6661468b2e7f11b0 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 30 Aug 2024 13:05:14 -0700 Subject: [PATCH 1/3] Move some metrics to test_custom_sampling_params --- ci/L0_backend_vllm/metrics_test/test.sh | 175 +++++------------- .../metrics_test/vllm_metrics_test.py | 41 ++-- 2 files changed, 73 insertions(+), 143 deletions(-) diff --git a/ci/L0_backend_vllm/metrics_test/test.sh b/ci/L0_backend_vllm/metrics_test/test.sh index 0a8a96d6..828557ee 100755 --- a/ci/L0_backend_vllm/metrics_test/test.sh +++ b/ci/L0_backend_vllm/metrics_test/test.sh @@ -49,36 +49,42 @@ function copy_model_repository { sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.4/' models/vllm_opt/1/model.json } -RET=0 - -# Test disabling vLLM metrics reporting without parameter "REPORT_CUSTOM_METRICS" in config.pbtxt -copy_model_repository -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi +run_test() { + local TEST_CASE=$1 + + run_server + if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + exit 1 + fi -set +e -python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled -v > $CLIENT_LOG 2>&1 + set +e + python3 $CLIENT_PY $TEST_CASE -v > $CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***" - RET=1 -else - check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS if [ $? -ne 0 ]; then cat $CLIENT_LOG - echo -e "\n***\n*** Test Result Verification FAILED.\n***" + echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***" RET=1 + else + check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS + if [ $? -ne 0 ]; then + cat $CLIENT_LOG + echo -e "\n***\n*** Test Result Verification FAILED.\n***" + RET=1 + fi fi -fi -set -e + set -e -kill $SERVER_PID -wait $SERVER_PID + kill $SERVER_PID + wait $SERVER_PID +} + +RET=0 + +# Test disabling vLLM metrics reporting without parameter "REPORT_CUSTOM_METRICS" in config.pbtxt +copy_model_repository +run_test VLLMTritonMetricsTest.test_vllm_metrics_disabled # Test disabling vLLM metrics reporting with parameter "REPORT_CUSTOM_METRICS" set to "no" in config.pbtxt copy_model_repository @@ -90,33 +96,7 @@ parameters: { } } " >> models/vllm_opt/config.pbtxt - -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi - -set +e -python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled -v > $CLIENT_LOG 2>&1 - -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***" - RET=1 -else - check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Result Verification FAILED.\n***" - RET=1 - fi -fi -set -e - -kill $SERVER_PID -wait $SERVER_PID +run_test VLLMTritonMetricsTest.test_vllm_metrics_disabled # Test vLLM metrics reporting with parameter "REPORT_CUSTOM_METRICS" set to "yes" in config.pbtxt copy_model_repository @@ -129,33 +109,22 @@ parameters: { } } " >> models/vllm_opt/config.pbtxt +run_test VLLMTritonMetricsTest.test_vllm_metrics -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi - -set +e -python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics -v > $CLIENT_LOG 2>&1 - -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics FAILED. \n***" - RET=1 -else - check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Result Verification FAILED.\n***" - RET=1 - fi -fi -set -e - -kill $SERVER_PID -wait $SERVER_PID +# Test vLLM metrics custom sampling parameters +# Custom sampling parameters may result in different vLLM output depending +# on the platform. Therefore, these metrics are tests separately. +copy_model_repository +cp ${SAMPLE_MODELS_REPO}/vllm_model/config.pbtxt models/vllm_opt +echo -e " +parameters: { + key: \"REPORT_CUSTOM_METRICS\" + value: { + string_value:\"yes\" + } +} +" >> models/vllm_opt/config.pbtxt +run_test VLLMTritonMetricsTest.test_custom_sampling_params # Test enabling vLLM metrics reporting in config.pbtxt but disabling in model.json copy_model_repository @@ -169,33 +138,7 @@ parameters: { } } " >> models/vllm_opt/config.pbtxt - -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi - -set +e -python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled -v > $CLIENT_LOG 2>&1 - -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***" - RET=1 -else - check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Result Verification FAILED.\n***" - RET=1 - fi -fi -set -e - -kill $SERVER_PID -wait $SERVER_PID +run_test VLLMTritonMetricsTest.test_vllm_metrics_disabled # Test enabling vLLM metrics reporting in config.pbtxt while disabling in server option copy_model_repository @@ -208,32 +151,8 @@ parameters: { } " >> models/vllm_opt/config.pbtxt SERVER_ARGS="${SERVER_ARGS} --allow-metrics=false" -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi - -set +e -python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_refused -v > $CLIENT_LOG 2>&1 - -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_refused FAILED. \n***" - RET=1 -else - check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Result Verification FAILED.\n***" - RET=1 - fi -fi -set -e +run_test VLLMTritonMetricsTest.test_vllm_metrics_refused -kill $SERVER_PID -wait $SERVER_PID rm -rf "./models" "temp.json" if [ $RET -eq 1 ]; then diff --git a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py index 86a299ec..598a5e0c 100644 --- a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py +++ b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py @@ -112,20 +112,10 @@ def vllm_infer( self.triton_client.stop_stream() def test_vllm_metrics(self): - # Adding sampling parameters for testing metrics. - # Definitions can be found here https://docs.vllm.ai/en/latest/dev/sampling_params.html - n, best_of = 2, 4 - custom_sampling_parameters = self.sampling_parameters.copy() - # Changing "temperature" because "best_of" must be 1 when using greedy - # sampling, i.e. "temperature": "0". - custom_sampling_parameters.update( - {"n": str(n), "best_of": str(best_of), "temperature": "1"} - ) - # Test vLLM metrics self.vllm_infer( prompts=self.prompts, - sampling_parameters=custom_sampling_parameters, + sampling_parameters=self.sampling_parameters, model_name=self.vllm_model_name, ) metrics_dict = self.parse_vllm_metrics() @@ -134,7 +124,7 @@ def test_vllm_metrics(self): # vllm:prompt_tokens_total self.assertEqual(metrics_dict["vllm:prompt_tokens_total"], 18) # vllm:generation_tokens_total - self.assertEqual(metrics_dict["vllm:generation_tokens_total"], 188) + self.assertEqual(metrics_dict["vllm:generation_tokens_total"], 48) # vllm:time_to_first_token_seconds self.assertEqual( metrics_dict["vllm:time_to_first_token_seconds_count"], total_prompts @@ -166,13 +156,34 @@ def test_vllm_metrics(self): # vllm:request_generation_tokens self.assertEqual( metrics_dict["vllm:request_generation_tokens_count"], - best_of * total_prompts, + total_prompts, ) - self.assertEqual(metrics_dict["vllm:request_generation_tokens_sum"], 188) + self.assertEqual(metrics_dict["vllm:request_generation_tokens_sum"], 48) self.assertEqual( metrics_dict["vllm:request_generation_tokens_bucket"], - best_of * total_prompts, + total_prompts, + ) + + def test_custom_sampling_params(self): + # Adding sampling parameters for testing metrics. + # Definitions can be found here https://docs.vllm.ai/en/latest/dev/sampling_params.html + n, best_of = 2, 4 + custom_sampling_parameters = self.sampling_parameters.copy() + # Changing "temperature" because "best_of" must be 1 when using greedy + # sampling, i.e. "temperature": "0". + custom_sampling_parameters.update( + {"n": str(n), "best_of": str(best_of), "temperature": "1"} + ) + + # Test vLLM metrics + self.vllm_infer( + prompts=self.prompts, + sampling_parameters=custom_sampling_parameters, + model_name=self.vllm_model_name, ) + metrics_dict = self.parse_vllm_metrics() + total_prompts = len(self.prompts) + # vllm:request_params_best_of self.assertEqual( metrics_dict["vllm:request_params_best_of_count"], total_prompts From 47488e31a0cc5ca92ecd13096726a4e720a782a1 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Tue, 3 Sep 2024 11:54:22 -0700 Subject: [PATCH 2/3] Fix error log --- ci/L0_backend_vllm/metrics_test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/L0_backend_vllm/metrics_test/test.sh b/ci/L0_backend_vllm/metrics_test/test.sh index 828557ee..fd976d4a 100755 --- a/ci/L0_backend_vllm/metrics_test/test.sh +++ b/ci/L0_backend_vllm/metrics_test/test.sh @@ -64,7 +64,7 @@ run_test() { if [ $? -ne 0 ]; then cat $CLIENT_LOG - echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***" + echo -e "\n***\n*** Running $CLIENT_PY $TEST_CASE FAILED. \n***" RET=1 else check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS From d307aa2ac4fb451e5e8e3738cbfdbd848097076f Mon Sep 17 00:00:00 2001 From: Yingge He Date: Tue, 3 Sep 2024 18:11:02 -0700 Subject: [PATCH 3/3] Comments for magic numbers --- ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py index 598a5e0c..6bef1746 100644 --- a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py +++ b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py @@ -122,8 +122,14 @@ def test_vllm_metrics(self): total_prompts = len(self.prompts) # vllm:prompt_tokens_total + # (2, 133, 144, 2702, 3477, 16) + # (2, 133, 812, 9, 1470, 16) + # (2, 133, 499, 9, 4687, 16) self.assertEqual(metrics_dict["vllm:prompt_tokens_total"], 18) # vllm:generation_tokens_total + # (5, 65, 14, 16, 144, 533, 7, 28, 848, 30, 10, 512, 4, 50118, 100, 437) + # (5, 812, 9, 5, 1515, 3497, 4, 50118, 50118, 133, 812, 9, 1470, 16, 5, 812) + # (11, 5, 1420, 9, 5, 82, 4, 50118, 50118, 133, 499, 9, 4687, 16, 11, 5) self.assertEqual(metrics_dict["vllm:generation_tokens_total"], 48) # vllm:time_to_first_token_seconds self.assertEqual(