From 2913e30360b8501c6aadf40446054609387913da Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Tue, 26 Nov 2024 12:52:04 -0800 Subject: [PATCH 1/6] Moved shared memory tests to their own test script; Generated a hash map for the different subtests of the L0_backend_python test --- qa/L0_backend_python/test.sh | 260 +++----------------- qa/L0_backend_python/test_shared_memory.sh | 268 +++++++++++++++++++++ 2 files changed, 307 insertions(+), 221 deletions(-) create mode 100644 qa/L0_backend_python/test_shared_memory.sh diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 10dbdd75d3..d94404525d 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -177,235 +177,53 @@ fi pip3 install pytest requests virtualenv -prev_num_pages=`get_shm_pages` -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi - -set +e -python3 -m pytest --junitxml=L0_backend_python.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - RET=1 -fi -set -e - -kill_server - -current_num_pages=`get_shm_pages` -if [ $current_num_pages -ne $prev_num_pages ]; then - ls /dev/shm - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. -Shared memory pages before starting triton equals to $prev_num_pages -and shared memory pages after starting triton equals to $current_num_pages \n***" - RET=1 -fi - -prev_num_pages=`get_shm_pages` -# Triton non-graceful exit -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi - -sleep 5 - -readarray -t triton_procs < <(pgrep --parent ${SERVER_PID}) - -set +e - -# Trigger non-graceful termination of Triton -kill -9 $SERVER_PID - -# Wait 10 seconds so that Python stub can detect non-graceful exit -sleep 10 - -for triton_proc in $triton_procs; do - kill -0 $triton_proc > /dev/null 2>&1 - if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Python backend non-graceful exit test failed \n***" - RET=1 - break - fi -done -set -e - -# -# Test KIND_GPU -# Disable env test for Jetson & Windows since GPU Tensors are not supported -if [ "$TEST_JETSON" == "0" ] && [[ ${TEST_WINDOWS} == 0 ]]; then - rm -rf models/ - mkdir -p models/add_sub_gpu/1/ - cp ../python_models/add_sub/model.py ./models/add_sub_gpu/1/ - cp ../python_models/add_sub_gpu/config.pbtxt ./models/add_sub_gpu/ - - prev_num_pages=`get_shm_pages` - run_server - if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 - fi - - if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** KIND_GPU model test failed \n***" - RET=1 - fi - - kill_server - - current_num_pages=`get_shm_pages` - if [ $current_num_pages -ne $prev_num_pages ]; then - cat $CLIENT_LOG - ls /dev/shm - echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. - Shared memory pages before starting triton equals to $prev_num_pages - and shared memory pages after starting triton equals to $current_num_pages \n***" - exit 1 - fi -fi - -# Test Multi file models -rm -rf models/ -mkdir -p models/multi_file/1/ -cp ../python_models/multi_file/*.py ./models/multi_file/1/ -cp ../python_models/identity_fp32/config.pbtxt ./models/multi_file/ -(cd models/multi_file && \ - sed -i "s/^name:.*/name: \"multi_file\"/" config.pbtxt) - -prev_num_pages=`get_shm_pages` -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 -fi - -if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** multi-file model test failed \n***" - RET=1 -fi - -kill_server - -current_num_pages=`get_shm_pages` -if [ $current_num_pages -ne $prev_num_pages ]; then - cat $SERVER_LOG - ls /dev/shm - echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. -Shared memory pages before starting triton equals to $prev_num_pages -and shared memory pages after starting triton equals to $current_num_pages \n***" - exit 1 -fi - -# Test environment variable propagation -rm -rf models/ -mkdir -p models/model_env/1/ -cp ../python_models/model_env/model.py ./models/model_env/1/ -cp ../python_models/model_env/config.pbtxt ./models/model_env/ - -export MY_ENV="MY_ENV" -if [[ ${TEST_WINDOWS} == 1 ]]; then - # This will run in WSL, but Triton will run in windows, so environment - # variables meant for loaded models must be exported using WSLENV. - # The /w flag indicates the value should only be included when invoking - # Win32 from WSL. - export WSLENV=MY_ENV/w -fi - -prev_num_pages=`get_shm_pages` -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - echo -e "\n***\n*** Environment variable test failed \n***" - exit 1 -fi - -kill_server - -current_num_pages=`get_shm_pages` -if [ $current_num_pages -ne $prev_num_pages ]; then - cat $CLIENT_LOG - ls /dev/shm - echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. -Shared memory pages before starting triton equals to $prev_num_pages -and shared memory pages after starting triton equals to $current_num_pages \n***" - exit 1 +# Set up the tests to execute. +declare -A subtest_properties +subtest_properties["shared_memory"]="" +subtest_properties["lifecycle"]="" +subtest_properties["argument_validation"]="" +subtest_properties["logging"]="" +subtest_properties["custom_metrics"]="" + +# Add tests depending on which environment is being run. +# If not running in igpu mode add these +if [ "$TEST_JETSON" == "0" ]; then + subtest_properties["ensemble"]="" + subtest_properties["bls"]="" + subtest_properties["decoupled"]="" + subtest_properties["response_sender"]="" + subtest_properties["env"]="" fi -rm -fr ./models -mkdir -p models/identity_fp32/1/ -cp ../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py -cp ../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt - -shm_default_byte_size=$((1024*1024*4)) -SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=$shm_default_byte_size" - -run_server -if [ "$SERVER_PID" == "0" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Failed to start $SERVER\n***" - exit 1 +# If not running on windows add these +if [[ ${TEST_WINDOWS} == 0 ]]; then + subtest_properties["variants"]="" + subtest_properties["io"]="" + subtest_properties["python_based_backends"]="" + subtest_properties["async_execute"]="" + subtest_properties["model_control"]="" + subtest_properties["examples"]="" + subtest_properties["request_rescheduling"]="" +fi + +if [[ -n "${SUBTESTS}" ]]; then + ALL_SUBTESTS=$(echo "${!subtest_properties[@]}") + for subtest in $(echo "${!subtest_properties[@]}"); do + if [[ ! "${SUBTESTS}" =~ "${subtest}" ]]; then + unset "subtest_properties[${subtest}]" + fi + done fi -for shm_page in `ls /dev/shm/`; do - if [[ $shm_page != triton_python_backend_shm* ]]; then - continue - fi - page_size=`ls -l /dev/shm/$shm_page 2>&1 | awk '{print $5}'` - if [ $page_size -ne $shm_default_byte_size ]; then - echo -e "Shared memory region size is not equal to -$shm_default_byte_size for page $shm_page. Region size is -$page_size." - RET=1 - fi +echo "Executing the following subtests: " +for subtest in $(echo "${!subtest_properties[@]}"); do + echo " ${subtest}: ${subtest_properties[${subtest}]}" done -kill_server - -# Test model getting killed during initialization -rm -fr ./models -mkdir -p models/init_exit/1/ -cp ../python_models/init_exit/model.py ./models/init_exit/1/model.py -cp ../python_models/init_exit/config.pbtxt ./models/init_exit/config.pbtxt +exit 0 +bash -ex test_shared_memory.sh -ERROR_MESSAGE="Stub process 'init_exit_0_0' is not healthy." -prev_num_pages=`get_shm_pages` -run_server -if [ "$SERVER_PID" != "0" ]; then - echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG - RET=1 - kill_server -else - if grep "$ERROR_MESSAGE" $SERVER_LOG; then - echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG - else - echo $CLIENT_LOG - echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG - RET=1 - fi -fi - -current_num_pages=`get_shm_pages` -if [ $current_num_pages -ne $prev_num_pages ]; then - cat $SERVER_LOG - ls /dev/shm - echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. -Shared memory pages before starting triton equals to $prev_num_pages -and shared memory pages after starting triton equals to $current_num_pages \n***" - exit 1 -fi # Disable env test for Jetson since cloud storage repos are not supported # Disable ensemble, io and bls tests for Jetson since GPU Tensors are not supported diff --git a/qa/L0_backend_python/test_shared_memory.sh b/qa/L0_backend_python/test_shared_memory.sh new file mode 100644 index 0000000000..c6d5532177 --- /dev/null +++ b/qa/L0_backend_python/test_shared_memory.sh @@ -0,0 +1,268 @@ +#!/bin/bash +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +BASE_SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=1" +# Set the default byte size to 5MBs to avoid going out of shared memory. The +# environment that this job runs on has only 1GB of shared-memory available. +SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=5242880" + +CLIENT_PY=./python_test.py +CLIENT_LOG="./client.log" +TEST_RESULT_FILE='test_results.txt' +SERVER_LOG="./inference_server.log" +source ../common/util.sh +source ./common.sh + +prev_num_pages=`get_shm_pages` +run_server +if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + exit 1 +fi + +set +e +python3 -m pytest --junitxml=L0_backend_python.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1 +if [ $? -ne 0 ]; then + cat $CLIENT_LOG + RET=1 +fi +set -e + +kill_server + +current_num_pages=`get_shm_pages` +if [ $current_num_pages -ne $prev_num_pages ]; then + ls /dev/shm + cat $CLIENT_LOG + echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. +Shared memory pages before starting triton equals to $prev_num_pages +and shared memory pages after starting triton equals to $current_num_pages \n***" + RET=1 +fi + +prev_num_pages=`get_shm_pages` +# Triton non-graceful exit +run_server +if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + exit 1 +fi + +sleep 5 + +readarray -t triton_procs < <(pgrep --parent ${SERVER_PID}) + +set +e + +# Trigger non-graceful termination of Triton +kill -9 $SERVER_PID + +# Wait 10 seconds so that Python stub can detect non-graceful exit +sleep 10 + +for triton_proc in $triton_procs; do + kill -0 $triton_proc > /dev/null 2>&1 + if [ $? -eq 0 ]; then + cat $CLIENT_LOG + echo -e "\n***\n*** Python backend non-graceful exit test failed \n***" + RET=1 + break + fi +done +set -e + +# +# Test KIND_GPU +# Disable env test for Jetson & Windows since GPU Tensors are not supported +if [ "$TEST_JETSON" == "0" ] && [[ ${TEST_WINDOWS} == 0 ]]; then + rm -rf models/ + mkdir -p models/add_sub_gpu/1/ + cp ../python_models/add_sub/model.py ./models/add_sub_gpu/1/ + cp ../python_models/add_sub_gpu/config.pbtxt ./models/add_sub_gpu/ + + prev_num_pages=`get_shm_pages` + run_server + if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + exit 1 + fi + + if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** KIND_GPU model test failed \n***" + RET=1 + fi + + kill_server + + current_num_pages=`get_shm_pages` + if [ $current_num_pages -ne $prev_num_pages ]; then + cat $CLIENT_LOG + ls /dev/shm + echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. + Shared memory pages before starting triton equals to $prev_num_pages + and shared memory pages after starting triton equals to $current_num_pages \n***" + exit 1 + fi +fi + +# Test Multi file models +rm -rf models/ +mkdir -p models/multi_file/1/ +cp ../python_models/multi_file/*.py ./models/multi_file/1/ +cp ../python_models/identity_fp32/config.pbtxt ./models/multi_file/ +(cd models/multi_file && \ + sed -i "s/^name:.*/name: \"multi_file\"/" config.pbtxt) + +prev_num_pages=`get_shm_pages` +run_server +if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + exit 1 +fi + +if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** multi-file model test failed \n***" + RET=1 +fi + +kill_server + +current_num_pages=`get_shm_pages` +if [ $current_num_pages -ne $prev_num_pages ]; then + cat $SERVER_LOG + ls /dev/shm + echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. +Shared memory pages before starting triton equals to $prev_num_pages +and shared memory pages after starting triton equals to $current_num_pages \n***" + exit 1 +fi + +# Test environment variable propagation +rm -rf models/ +mkdir -p models/model_env/1/ +cp ../python_models/model_env/model.py ./models/model_env/1/ +cp ../python_models/model_env/config.pbtxt ./models/model_env/ + +export MY_ENV="MY_ENV" +if [[ ${TEST_WINDOWS} == 1 ]]; then + # This will run in WSL, but Triton will run in windows, so environment + # variables meant for loaded models must be exported using WSLENV. + # The /w flag indicates the value should only be included when invoking + # Win32 from WSL. + export WSLENV=MY_ENV/w +fi + +prev_num_pages=`get_shm_pages` +run_server +if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + echo -e "\n***\n*** Environment variable test failed \n***" + exit 1 +fi + +kill_server + +current_num_pages=`get_shm_pages` +if [ $current_num_pages -ne $prev_num_pages ]; then + cat $CLIENT_LOG + ls /dev/shm + echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. +Shared memory pages before starting triton equals to $prev_num_pages +and shared memory pages after starting triton equals to $current_num_pages \n***" + exit 1 +fi + +rm -fr ./models +mkdir -p models/identity_fp32/1/ +cp ../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py +cp ../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt + +shm_default_byte_size=$((1024*1024*4)) +SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=$shm_default_byte_size" + +run_server +if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + exit 1 +fi + +for shm_page in `ls /dev/shm/`; do + if [[ $shm_page != triton_python_backend_shm* ]]; then + continue + fi + page_size=`ls -l /dev/shm/$shm_page 2>&1 | awk '{print $5}'` + if [ $page_size -ne $shm_default_byte_size ]; then + echo -e "Shared memory region size is not equal to +$shm_default_byte_size for page $shm_page. Region size is +$page_size." + RET=1 + fi +done + +kill_server + +# Test model getting killed during initialization +rm -fr ./models +mkdir -p models/init_exit/1/ +cp ../python_models/init_exit/model.py ./models/init_exit/1/model.py +cp ../python_models/init_exit/config.pbtxt ./models/init_exit/config.pbtxt + +ERROR_MESSAGE="Stub process 'init_exit_0_0' is not healthy." + +prev_num_pages=`get_shm_pages` +run_server +if [ "$SERVER_PID" != "0" ]; then + echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG + RET=1 + kill_server +else + if grep "$ERROR_MESSAGE" $SERVER_LOG; then + echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG + else + echo $CLIENT_LOG + echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG + RET=1 + fi +fi + +current_num_pages=`get_shm_pages` +if [ $current_num_pages -ne $prev_num_pages ]; then + cat $SERVER_LOG + ls /dev/shm + echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly. +Shared memory pages before starting triton equals to $prev_num_pages +and shared memory pages after starting triton equals to $current_num_pages \n***" + exit 1 +fi From df040e395ed4915cc47e112bb0b9e7f51e73c2db Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Tue, 26 Nov 2024 13:04:16 -0800 Subject: [PATCH 2/6] subtests make use of the subtest_properties hash map --- qa/L0_backend_python/test.sh | 102 ++++++++++++++--------------------- 1 file changed, 39 insertions(+), 63 deletions(-) diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index d94404525d..c1e39ff8ac 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -186,7 +186,12 @@ subtest_properties["logging"]="" subtest_properties["custom_metrics"]="" # Add tests depending on which environment is being run. + # If not running in igpu mode add these +# Disable env test for Jetson since cloud storage repos are not supported +# Disable ensemble, io and bls tests for Jetson since GPU Tensors are not supported +# Disable variants test for Jetson since already built without GPU Tensor support +# Disable decoupled test because it uses GPU tensors if [ "$TEST_JETSON" == "0" ]; then subtest_properties["ensemble"]="" subtest_properties["bls"]="" @@ -196,6 +201,11 @@ if [ "$TEST_JETSON" == "0" ]; then fi # If not running on windows add these +# [DLIS-6093] Disable variants test for Windows since tests are not executed in docker container (cannot apt update/install) +# [DLIS-5970] Disable io tests for Windows since GPU Tensors are not supported +# [DLIS-6124] Disable restart test for Windows since it requires more investigation +# [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload +# [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients if [[ ${TEST_WINDOWS} == 0 ]]; then subtest_properties["variants"]="" subtest_properties["io"]="" @@ -206,14 +216,15 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then subtest_properties["request_rescheduling"]="" fi +ALL_SUBTESTS=$(echo "${!subtest_properties[@]}") if [[ -n "${SUBTESTS}" ]]; then - ALL_SUBTESTS=$(echo "${!subtest_properties[@]}") for subtest in $(echo "${!subtest_properties[@]}"); do if [[ ! "${SUBTESTS}" =~ "${subtest}" ]]; then unset "subtest_properties[${subtest}]" fi done fi +ALL_SUBTESTS=$(echo "${!subtest_properties[@]}") echo "Executing the following subtests: " for subtest in $(echo "${!subtest_properties[@]}"); do @@ -221,85 +232,50 @@ for subtest in $(echo "${!subtest_properties[@]}"); do done exit 0 -bash -ex test_shared_memory.sh - - -# Disable env test for Jetson since cloud storage repos are not supported -# Disable ensemble, io and bls tests for Jetson since GPU Tensors are not supported -# Disable variants test for Jetson since already built without GPU Tensor support -# Disable decoupled test because it uses GPU tensors -if [ "$TEST_JETSON" == "0" ]; then - SUBTESTS="ensemble bls decoupled response_sender" - # [DLIS-6093] Disable variants test for Windows since tests are not executed in docker container (cannot apt update/install) - # [DLIS-5970] Disable io tests for Windows since GPU Tensors are not supported - # [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload - if [[ ${TEST_WINDOWS} == 0 ]]; then - SUBTESTS+=" variants io python_based_backends async_execute" - fi - - for TEST in ${SUBTESTS}; do - # Run each subtest in a separate virtual environment to avoid conflicts - # between dependencies. - setup_virtualenv - - set +e - (cd ${TEST} && bash -ex test.sh) - EXIT_CODE=$? - if [ $EXIT_CODE -ne 0 ]; then - echo "Subtest ${TEST} FAILED" - RET=$EXIT_CODE - - # In bls test, it is allowed to fail with a strict memory leak of 480 bytes with exit code '123'. - # Propagate the exit code to make sure it's not overwritten by other tests. - if [[ ${TEST} == "bls" ]] && [[ $EXIT_CODE -ne 1 ]] ; then - BLS_RET=$RET - fi - fi - set -e - - deactivate_virtualenv - done +if [[ "${ALL_SUBTETS}" =~ "shared_memory" ]]; then + bash -ex test_shared_memory.sh +fi - # [DLIS-5969]: Incorporate env test for windows - if [[ ${PYTHON_ENV_VERSION} = "12" ]] && [[ ${TEST_WINDOWS} == 0 ]]; then - # In 'env' test we use miniconda for dependency management. No need to run - # the test in a virtual environment. - set +e - (cd env && bash -ex test.sh) - if [ $? -ne 0 ]; then - echo "Subtest env FAILED" - RET=1 - fi - set -e +for TEST in ${ALL_SUBTESTS}; do + if [[ "${TEST}" == "env" ]]; then + continue fi -fi -SUBTESTS="lifecycle argument_validation logging custom_metrics" -# [DLIS-6124] Disable restart test for Windows since it requires more investigation -# [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload -# [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients -if [[ ${TEST_WINDOWS} == 0 ]]; then - # TODO: Reimplement restart on decoupled data pipeline and enable restart. - SUBTESTS+=" model_control examples request_rescheduling" -fi -for TEST in ${SUBTESTS}; do # Run each subtest in a separate virtual environment to avoid conflicts # between dependencies. setup_virtualenv set +e (cd ${TEST} && bash -ex test.sh) - - if [ $? -ne 0 ]; then + EXIT_CODE=$? + if [ $EXIT_CODE -ne 0 ]; then echo "Subtest ${TEST} FAILED" - RET=1 + RET=$EXIT_CODE + + # In bls test, it is allowed to fail with a strict memory leak of 480 bytes with exit code '123'. + # Propagate the exit code to make sure it's not overwritten by other tests. + if [[ ${TEST} == "bls" ]] && [[ $EXIT_CODE -ne 1 ]] ; then + BLS_RET=$RET + fi fi set -e deactivate_virtualenv done +if [[ ${ALL_SUBTESTS} =~ "env" ]]; then + # In 'env' test we use miniconda for dependency management. No need to run + # the test in a virtual environment. + set +e + (cd env && bash -ex test.sh) + if [ $? -ne 0 ]; then + echo "Subtest env FAILED" + RET=1 + fi + set -e +fi + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" else From 20a949f6f030c60e9156e7269ead51635db2df83 Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Wed, 27 Nov 2024 09:40:37 -0800 Subject: [PATCH 3/6] Passing properties to the subtests --- qa/L0_backend_python/test.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index c1e39ff8ac..a6f0be0733 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -197,7 +197,7 @@ if [ "$TEST_JETSON" == "0" ]; then subtest_properties["bls"]="" subtest_properties["decoupled"]="" subtest_properties["response_sender"]="" - subtest_properties["env"]="" + subtest_properties["env"]="locale" fi # If not running on windows add these @@ -231,8 +231,6 @@ for subtest in $(echo "${!subtest_properties[@]}"); do echo " ${subtest}: ${subtest_properties[${subtest}]}" done -exit 0 - if [[ "${ALL_SUBTETS}" =~ "shared_memory" ]]; then bash -ex test_shared_memory.sh fi @@ -247,7 +245,7 @@ for TEST in ${ALL_SUBTESTS}; do setup_virtualenv set +e - (cd ${TEST} && bash -ex test.sh) + (cd ${TEST} && PROPERTIES="${subtest_properties["${TEST}"]}" bash -ex test.sh) EXIT_CODE=$? if [ $EXIT_CODE -ne 0 ]; then echo "Subtest ${TEST} FAILED" @@ -264,11 +262,11 @@ for TEST in ${ALL_SUBTESTS}; do deactivate_virtualenv done -if [[ ${ALL_SUBTESTS} =~ "env" ]]; then +if [[ "${ALL_SUBTESTS}" =~ "env" ]]; then # In 'env' test we use miniconda for dependency management. No need to run # the test in a virtual environment. set +e - (cd env && bash -ex test.sh) + (cd env && PROPERTIES="${subtest_properties["env"]}" bash -ex test.sh) if [ $? -ne 0 ]; then echo "Subtest env FAILED" RET=1 From 9c923b4230569fce6eef8776ecf5ecd2fcf29abd Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Wed, 27 Nov 2024 09:41:41 -0800 Subject: [PATCH 4/6] Subtests making use of the properties passed to them --- qa/L0_backend_python/env/test.sh | 402 +++++++++++++++------------ qa/L0_backend_python/logging/test.sh | 1 + 2 files changed, 226 insertions(+), 177 deletions(-) diff --git a/qa/L0_backend_python/env/test.sh b/qa/L0_backend_python/env/test.sh index 11a51378fb..8a760536f3 100755 --- a/qa/L0_backend_python/env/test.sh +++ b/qa/L0_backend_python/env/test.sh @@ -32,7 +32,194 @@ source ../../common/util.sh BASE_SERVER_ARGS="--model-repository=${MODELDIR}/env/models --log-verbose=1 --disable-auto-complete-config" PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG SERVER_ARGS=$BASE_SERVER_ARGS -SERVER_LOG="./env_server.log" + +# Available properties are: locale, extraction, aws +PROPERTIES=${PROPERTIES:=""} +echo "properties: ${PROPERTIES}" + +locale_test() { + local EXPECTED_VERSION_STRING=$1 + + SERVER_LOG="inference_server_locale_none.log" + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 + fi + + kill $SERVER_PID + wait $SERVER_PID + + set +e + grep "$EXPECTED_VERSION_STRING" $SERVER_LOG + if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***" + RET=1 + fi + + # Test default (non set) locale in python stub processes + # NOTE: In certain pybind versions, the locale settings may not be propagated from parent to + # stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260. + export LC_ALL=INVALID + grep "Locale is (None, None)" $SERVER_LOG + if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Default unset Locale was not found in Triton logs. \n***" + RET=1 + fi + set -e + + # Test locale set via environment variable in python stub processes + # NOTE: In certain pybind versions, the locale settings may not be propagated from parent to + # stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260. + export LC_ALL=C.UTF-8 + SERVER_LOG="inference_server_locale_utf8.log" + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 + fi + + kill $SERVER_PID + wait $SERVER_PID + + set +e + grep "Locale is ('en_US', 'UTF-8')" $SERVER_LOG + if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Locale UTF-8 was not found in Triton logs. \n***" + RET=1 + fi + set -e +} + +extraction_test() { + ## Test re-extraction of environment. + SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --model-control-mode=explicit" + SERVER_LOG="inference_server_extraction.log" + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 + fi + + # The environment should be extracted + curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load + touch -m models/python_3_12/1/model.py + # The environment should not be re-extracted + curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load + touch -m models/python_3_12/python_3_12_environment.tar.gz + # The environment should be re-extracted + curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load + + kill $SERVER_PID + wait $SERVER_PID + + set +e + + PY312_ENV_EXTRACTION="Extracting Python execution env" + if [ `grep -c "${PY312_ENV_EXTRACTION}" ${SERVER_LOG}` != "2" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Python execution environment should be extracted exactly twice. \n***" + RET=1 + fi + set -e +} + +aws_test() { + # Test execution environments with S3 + # S3 credentials are necessary for this test. Pass via ENV variables + aws configure set default.region $AWS_DEFAULT_REGION && \ + aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \ + aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY + + # S3 bucket path (Point to bucket when testing cloud storage) + BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}" + + # Cleanup and delete S3 test bucket if it already exists (due to test failure) + aws s3 rm $BUCKET_URL --recursive --include "*" && \ + aws s3 rb $BUCKET_URL || true + + # Make S3 test bucket + aws s3 mb "${BUCKET_URL}" + + # Remove Slash in BUCKET_URL + BUCKET_URL=${BUCKET_URL%/} + BUCKET_URL_SLASH="${BUCKET_URL}/" + + # Remove Python 3.7 model because it contains absolute paths and cannot be used + # with S3. + rm -rf models/python_3_7 + + # Test with the bucket url as model repository + aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*" + + SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1" + SERVER_LOG="inference_server_aws_bucket.log" + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + aws s3 rb "${BUCKET_URL}" --force || true + exit 1 + fi + + kill $SERVER_PID + wait $SERVER_PID + + set +e + grep "$PY36_VERSION_STRING" $SERVER_LOG + if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** $PY36_VERSION_STRING was not found in Triton logs. \n***" + RET=1 + fi + set -e + + # Clean up bucket contents + aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*" + + # Test with EXECUTION_ENV_PATH outside the model directory + sed -i "s/TRITON_MODEL_DIRECTORY\/python_3_6_environment/TRITON_MODEL_DIRECTORY\/..\/python_3_6_environment/" models/python_3_6/config.pbtxt + mv models/python_3_6/python_3_6_environment.tar.gz models + sed -i "s/\$\$TRITON_MODEL_DIRECTORY\/python_3_12_environment/s3:\/\/triton-bucket-${CI_JOB_ID}\/python_3_12_environment/" models/python_3_12/config.pbtxt + mv models/python_3_12/python_3_12_environment.tar.gz models + + aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*" + + SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1" + SERVER_LOG="inference_server_aws_extraction.log" + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + aws s3 rb "${BUCKET_URL}" --force || true + exit 1 + fi + + kill $SERVER_PID + wait $SERVER_PID + + set +e + for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY312_VERSION_STRING"; do + grep "$EXPECTED_VERSION_STRING" $SERVER_LOG + if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***" + RET=1 + fi + done + set -e + + # Clean up bucket contents and delete bucket + aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*" + aws s3 rb "${BUCKET_URL}" + +} RET=0 @@ -62,6 +249,17 @@ cp ../../python_models/python_version/model.py ./models/python_3_7/1/ cp python_backend/builddir/triton_python_backend_stub ./models/python_3_7 conda deactivate +if [[ "${PROPERTIES}" =~ "locale" ]]; then + locale_test $PY37_VERSION_STRING +fi +if [[ "${PROPERTIES}" =~ "extraction" ]]; then + extraction_test +fi +if [[ "${PROPERTIES}" =~ "aws" ]]; then + aws_test +fi + + # Use python-3-7 without conda pack # Create a model with python 3.7 version and numpy 1.20.3 to distinguish from # previous test. @@ -86,6 +284,16 @@ cp $path_to_conda_pack/lib/python3.7/site-packages/conda_pack/scripts/posix/acti cp python_backend/builddir/triton_python_backend_stub ./models/python_3_7_1 conda deactivate +if [[ "${PROPERTIES}" =~ "locale" ]]; then + locale_test $PY37_1_VERSION_STRING +fi +if [[ "${PROPERTIES}" =~ "extraction" ]]; then + extraction_test +fi +if [[ "${PROPERTIES}" =~ "aws" ]]; then + aws_test +fi + # Create a model with python 3.6 version # Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of # the Python model indicates that the environment has been setup correctly. @@ -109,6 +317,16 @@ cp ../../python_models/python_version/model.py ./models/python_3_6/1/ cp python_backend/builddir/triton_python_backend_stub ./models/python_3_6 conda deactivate +if [[ "${PROPERTIES}" =~ "locale" ]]; then + locale_test $PY36_VERSION_STRING +fi +if [[ "${PROPERTIES}" =~ "extraction" ]]; then + extraction_test +fi +if [[ "${PROPERTIES}" =~ "aws" ]]; then + aws_test +fi + # Test conda env without custom Python backend stub This environment should # always use the default Python version shipped in the container. For Ubuntu # 24.04 it is Python 3.12, for Ubuntu 22.04 is Python 3.10 and for Ubuntu 20.04 @@ -130,186 +348,16 @@ cp ../../python_models/python_version/model.py ./models/python_3_12/1/ conda deactivate rm -rf ./miniconda -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - exit 1 -fi - -kill $SERVER_PID -wait $SERVER_PID - -set +e -for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY37_VERSION_STRING" "$PY37_1_VERSION_STRING" "$PY312_VERSION_STRING"; do - grep "$EXPECTED_VERSION_STRING" $SERVER_LOG - if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***" - RET=1 - fi -done - -# Test default (non set) locale in python stub processes -# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to -# stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260. -export LC_ALL=INVALID -grep "Locale is (None, None)" $SERVER_LOG - if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Default unset Locale was not found in Triton logs. \n***" - RET=1 - fi -set -e - -rm $SERVER_LOG - -# Test locale set via environment variable in python stub processes -# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to -# stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260. -export LC_ALL=C.UTF-8 -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - exit 1 -fi - -kill $SERVER_PID -wait $SERVER_PID - -set +e -grep "Locale is ('en_US', 'UTF-8')" $SERVER_LOG - if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Locale UTF-8 was not found in Triton logs. \n***" - RET=1 - fi -set -e - -rm $SERVER_LOG - -## Test re-extraction of environment. -SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --model-control-mode=explicit" -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - exit 1 -fi - -# The environment should be extracted -curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load -touch -m models/python_3_12/1/model.py -# The environment should not be re-extracted -curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load -touch -m models/python_3_12/python_3_12_environment.tar.gz -# The environment should be re-extracted -curl -v -X POST localhost:8000/v2/repository/models/python_3_12/load - -kill $SERVER_PID -wait $SERVER_PID - -set +e - -PY312_ENV_EXTRACTION="Extracting Python execution env" -if [ `grep -c "${PY312_ENV_EXTRACTION}" ${SERVER_LOG}` != "2" ]; then - cat $SERVER_LOG - echo -e "\n***\n*** Python execution environment should be extracted exactly twice. \n***" - RET=1 +if [[ "${PROPERTIES}" =~ "locale" ]]; then + locale_test $PY312_VERSION_STRING fi -set -e - -# Test execution environments with S3 -# S3 credentials are necessary for this test. Pass via ENV variables -aws configure set default.region $AWS_DEFAULT_REGION && \ - aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \ - aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY - -# S3 bucket path (Point to bucket when testing cloud storage) -BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}" - -# Cleanup and delete S3 test bucket if it already exists (due to test failure) -aws s3 rm $BUCKET_URL --recursive --include "*" && \ - aws s3 rb $BUCKET_URL || true - -# Make S3 test bucket -aws s3 mb "${BUCKET_URL}" - -# Remove Slash in BUCKET_URL -BUCKET_URL=${BUCKET_URL%/} -BUCKET_URL_SLASH="${BUCKET_URL}/" - -# Remove Python 3.7 model because it contains absolute paths and cannot be used -# with S3. -rm -rf models/python_3_7 - -# Test with the bucket url as model repository -aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*" - -rm $SERVER_LOG - -SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1" -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - aws s3 rb "${BUCKET_URL}" --force || true - exit 1 -fi - -kill $SERVER_PID -wait $SERVER_PID - -set +e -grep "$PY36_VERSION_STRING" $SERVER_LOG -if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** $PY36_VERSION_STRING was not found in Triton logs. \n***" - RET=1 +if [[ "${PROPERTIES}" =~ "extraction" ]]; then + extraction_test fi -set -e - -# Clean up bucket contents -aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*" - -# Test with EXECUTION_ENV_PATH outside the model directory -sed -i "s/TRITON_MODEL_DIRECTORY\/python_3_6_environment/TRITON_MODEL_DIRECTORY\/..\/python_3_6_environment/" models/python_3_6/config.pbtxt -mv models/python_3_6/python_3_6_environment.tar.gz models -sed -i "s/\$\$TRITON_MODEL_DIRECTORY\/python_3_12_environment/s3:\/\/triton-bucket-${CI_JOB_ID}\/python_3_12_environment/" models/python_3_12/config.pbtxt -mv models/python_3_12/python_3_12_environment.tar.gz models - -aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*" - -rm $SERVER_LOG - -SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1" -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - aws s3 rb "${BUCKET_URL}" --force || true - exit 1 +if [[ "${PROPERTIES}" =~ "aws" ]]; then + aws_test fi -kill $SERVER_PID -wait $SERVER_PID - -set +e -for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY312_VERSION_STRING"; do - grep "$EXPECTED_VERSION_STRING" $SERVER_LOG - if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***" - RET=1 - fi -done -set -e - -# Clean up bucket contents and delete bucket -aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*" -aws s3 rb "${BUCKET_URL}" - if [ $RET -eq 0 ]; then echo -e "\n***\n*** Env Manager Test PASSED.\n***" else diff --git a/qa/L0_backend_python/logging/test.sh b/qa/L0_backend_python/logging/test.sh index 174f3e0140..508efa470a 100755 --- a/qa/L0_backend_python/logging/test.sh +++ b/qa/L0_backend_python/logging/test.sh @@ -25,6 +25,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +PROPERTIES=${PROPERTIES:=""} CLIENT_LOG="logging_client.log" TEST_RESULT_FILE="test_results.txt" LOG_TEST="logging_test.py" From cc5d96af62a278018e60cd6fae563615845698b7 Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Wed, 27 Nov 2024 12:02:03 -0800 Subject: [PATCH 5/6] Adding comment explaining the subtest_properties hash map --- qa/L0_backend_python/test.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index a6f0be0733..61835045a3 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -178,6 +178,17 @@ fi pip3 install pytest requests virtualenv # Set up the tests to execute. +# `subtest_properties` is a hash map of the subtests to run +# and the associated properties, or configurations, for each +# subtest. The list of subtests to run is found by listing the final +# set of keys. The subtest properties are variables which the +# subtest can consume to narrow or widen the scope of the subtest. +# In the same way we can add or remove entries in the hash map +# to include/exclude subtests, we can add or remove properties in +# the value to increase/decrease the scope of the subtest. +# +# = subtest name (also the subdirectory for the subtest) +# = properties which the subtest can consume. declare -A subtest_properties subtest_properties["shared_memory"]="" subtest_properties["lifecycle"]="" @@ -197,7 +208,7 @@ if [ "$TEST_JETSON" == "0" ]; then subtest_properties["bls"]="" subtest_properties["decoupled"]="" subtest_properties["response_sender"]="" - subtest_properties["env"]="locale" + subtest_properties["env"]="locale extraction aws" fi # If not running on windows add these From deb693b3eb44440c9cbb324c4ff5173d15e0d7b0 Mon Sep 17 00:00:00 2001 From: Kyle McGill Date: Wed, 27 Nov 2024 12:06:55 -0800 Subject: [PATCH 6/6] Fix failing tests for review --- qa/L0_backend_python/env/test.sh | 69 ++++++++++++++------------------ 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/qa/L0_backend_python/env/test.sh b/qa/L0_backend_python/env/test.sh index 8a760536f3..99de2009a9 100755 --- a/qa/L0_backend_python/env/test.sh +++ b/qa/L0_backend_python/env/test.sh @@ -96,6 +96,7 @@ locale_test() { set -e } +# This is only run for the default installation of python version, 3.12. extraction_test() { ## Test re-extraction of environment. SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --model-control-mode=explicit" @@ -131,6 +132,9 @@ extraction_test() { } aws_test() { + local EXPECTED_VERSION_STRING=$1 + local MODEL_TYPE=$2 # Can be one of: python_3_6, python_3_12 + # Test execution environments with S3 # S3 credentials are necessary for this test. Pass via ENV variables aws configure set default.region $AWS_DEFAULT_REGION && \ @@ -151,10 +155,6 @@ aws_test() { BUCKET_URL=${BUCKET_URL%/} BUCKET_URL_SLASH="${BUCKET_URL}/" - # Remove Python 3.7 model because it contains absolute paths and cannot be used - # with S3. - rm -rf models/python_3_7 - # Test with the bucket url as model repository aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*" @@ -172,10 +172,10 @@ aws_test() { wait $SERVER_PID set +e - grep "$PY36_VERSION_STRING" $SERVER_LOG + grep "$EXPECTED_VERSION_STRING" $SERVER_LOG if [ $? -ne 0 ]; then cat $SERVER_LOG - echo -e "\n***\n*** $PY36_VERSION_STRING was not found in Triton logs. \n***" + echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***" RET=1 fi set -e @@ -184,11 +184,14 @@ aws_test() { aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*" # Test with EXECUTION_ENV_PATH outside the model directory - sed -i "s/TRITON_MODEL_DIRECTORY\/python_3_6_environment/TRITON_MODEL_DIRECTORY\/..\/python_3_6_environment/" models/python_3_6/config.pbtxt - mv models/python_3_6/python_3_6_environment.tar.gz models - sed -i "s/\$\$TRITON_MODEL_DIRECTORY\/python_3_12_environment/s3:\/\/triton-bucket-${CI_JOB_ID}\/python_3_12_environment/" models/python_3_12/config.pbtxt - mv models/python_3_12/python_3_12_environment.tar.gz models - + if [[ "$MODEL_TYPE" == "python_3_6" ]]; then + sed -i "s/TRITON_MODEL_DIRECTORY\/python_3_6_environment/TRITON_MODEL_DIRECTORY\/..\/python_3_6_environment/" models/python_3_6/config.pbtxt + mv models/python_3_6/python_3_6_environment.tar.gz models + elif [[ "$MODEL_TYPE" == "python_3_12" ]]; then + sed -i "s/\$\$TRITON_MODEL_DIRECTORY\/python_3_12_environment/s3:\/\/triton-bucket-${CI_JOB_ID}\/python_3_12_environment/" models/python_3_12/config.pbtxt + mv models/python_3_12/python_3_12_environment.tar.gz models + fi + aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*" SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1" @@ -205,14 +208,12 @@ aws_test() { wait $SERVER_PID set +e - for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY312_VERSION_STRING"; do - grep "$EXPECTED_VERSION_STRING" $SERVER_LOG - if [ $? -ne 0 ]; then - cat $SERVER_LOG - echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***" - RET=1 - fi - done + grep "$EXPECTED_VERSION_STRING" $SERVER_LOG + if [ $? -ne 0 ]; then + cat $SERVER_LOG + echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***" + RET=1 + fi set -e # Clean up bucket contents and delete bucket @@ -240,6 +241,7 @@ PY37_VERSION_STRING="Python version is 3.7, NumPy version is 1.20.1, and Tensorf create_python_backend_stub conda-pack -o python3.7.tar.gz path_to_conda_pack=`pwd`/python3.7.tar.gz +rm -r models/ || true mkdir -p models/python_3_7/1/ cp ../../python_models/python_version/config.pbtxt ./models/python_3_7 (cd models/python_3_7 && \ @@ -252,12 +254,6 @@ conda deactivate if [[ "${PROPERTIES}" =~ "locale" ]]; then locale_test $PY37_VERSION_STRING fi -if [[ "${PROPERTIES}" =~ "extraction" ]]; then - extraction_test -fi -if [[ "${PROPERTIES}" =~ "aws" ]]; then - aws_test -fi # Use python-3-7 without conda pack @@ -273,6 +269,7 @@ conda install -c conda-forge libstdcxx-ng=14 -y PY37_1_VERSION_STRING="Python version is 3.7, NumPy version is 1.20.3, and Tensorflow version is 2.1.0" create_python_backend_stub +rm -r models/ || true mkdir -p models/python_3_7_1/1/ cp ../../python_models/python_version/config.pbtxt ./models/python_3_7_1 (cd models/python_3_7_1 && \ @@ -285,14 +282,11 @@ cp python_backend/builddir/triton_python_backend_stub ./models/python_3_7_1 conda deactivate if [[ "${PROPERTIES}" =~ "locale" ]]; then - locale_test $PY37_1_VERSION_STRING -fi -if [[ "${PROPERTIES}" =~ "extraction" ]]; then - extraction_test -fi -if [[ "${PROPERTIES}" =~ "aws" ]]; then - aws_test + locale_test "$PY37_1_VERSION_STRING" fi +# if [[ "${PROPERTIES}" =~ "aws" ]]; then +# aws_test "$PY37_1_VERSION_STRING" "python_3_7_1" +# fi # Create a model with python 3.6 version # Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of @@ -307,6 +301,7 @@ conda-pack -o python3.6.tar.gz # Test relative execution env path path_to_conda_pack='$$TRITON_MODEL_DIRECTORY/python_3_6_environment.tar.gz' create_python_backend_stub +rm -r models/ || true mkdir -p models/python_3_6/1/ cp ../../python_models/python_version/config.pbtxt ./models/python_3_6 cp python3.6.tar.gz models/python_3_6/python_3_6_environment.tar.gz @@ -320,11 +315,8 @@ conda deactivate if [[ "${PROPERTIES}" =~ "locale" ]]; then locale_test $PY36_VERSION_STRING fi -if [[ "${PROPERTIES}" =~ "extraction" ]]; then - extraction_test -fi if [[ "${PROPERTIES}" =~ "aws" ]]; then - aws_test + aws_test "$PY36_VERSION_STRING" "python_3_6" fi # Test conda env without custom Python backend stub This environment should @@ -338,6 +330,7 @@ conda install numpy=1.26.4 -y conda install tensorflow=2.16.2 -y PY312_VERSION_STRING="Python version is 3.12, NumPy version is 1.26.4, and Tensorflow version is 2.16.2" conda pack -o python3.12.tar.gz +rm -r models/ || true mkdir -p models/python_3_12/1/ cp ../../python_models/python_version/config.pbtxt ./models/python_3_12 cp python3.12.tar.gz models/python_3_12/python_3_12_environment.tar.gz @@ -352,10 +345,10 @@ if [[ "${PROPERTIES}" =~ "locale" ]]; then locale_test $PY312_VERSION_STRING fi if [[ "${PROPERTIES}" =~ "extraction" ]]; then - extraction_test + extraction_test fi if [[ "${PROPERTIES}" =~ "aws" ]]; then - aws_test + aws_test "$PY312_VERSION_STRING" "python_3_12" fi if [ $RET -eq 0 ]; then