From 8fbe07cacb2f93e3ed2a604cd4b17a5321035483 Mon Sep 17 00:00:00 2001 From: Diego Lovison Date: Tue, 27 Aug 2024 15:25:02 -0300 Subject: [PATCH] Fix: Basic sample tests - sequential is flaky (#11138) Signed-off-by: Diego Lovison --- .github/workflows/e2e-test.yaml | 7 +- scripts/deploy/github/build-images.sh | 1 + scripts/deploy/github/forward-port.sh | 1 + test/sample-test/requirements.in | 10 +- test/sample-test/requirements.txt | 220 ++++++++++++----------- test/sample-test/run_sample_test.py | 41 ++--- test/sample-test/sample_test_launcher.py | 15 +- 7 files changed, 154 insertions(+), 141 deletions(-) diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index daef2ae49a73..bc6d1d1126e8 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -150,11 +150,14 @@ jobs: - name: Forward API port run: ./scripts/deploy/github/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888 + - name: Install prerequisites + run: pip3 install -r ./test/sample-test/requirements.txt + - name: Basic sample tests - sequential - run: pip3 install -r ./test/sample-test/requirements.txt && pip3 install kfp~=2.0 && python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name sequential --results-gcs-dir output + run: python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name sequential --results-gcs-dir output - name: Basic sample tests - exit_handler - run: pip3 install -r ./test/sample-test/requirements.txt && pip3 install kfp~=2.0 && python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name exit_handler --results-gcs-dir output + run: python3 ./test/sample-test/sample_test_launcher.py sample_test run_test --namespace kubeflow --test-name exit_handler --results-gcs-dir output - name: Collect test results if: always() diff --git a/scripts/deploy/github/build-images.sh b/scripts/deploy/github/build-images.sh index ffa6db2e216f..bce5a1d6d03b 100755 --- a/scripts/deploy/github/build-images.sh +++ b/scripts/deploy/github/build-images.sh @@ -19,6 +19,7 @@ set -e REGISTRY="${REGISTRY:-kind-registry:5000}" +echo "REGISTRY=$REGISTRY" TAG="${TAG:-latest}" docker system prune -a -f diff --git a/scripts/deploy/github/forward-port.sh b/scripts/deploy/github/forward-port.sh index 416958c764c9..5423439f3394 100755 --- a/scripts/deploy/github/forward-port.sh +++ b/scripts/deploy/github/forward-port.sh @@ -24,6 +24,7 @@ LOCAL_PORT=$3 REMOTE_PORT=$4 POD_NAME=$(kubectl get pods -n "$KUBEFLOW_NS" -l "app=$APP_NAME" -o jsonpath='{.items[0].metadata.name}') +echo "POD_NAME=$POD_NAME" if [ $QUIET -eq 1 ]; then kubectl port-forward -n "$KUBEFLOW_NS" "$POD_NAME" "$LOCAL_PORT:$REMOTE_PORT" > /dev/null 2>&1 & diff --git a/test/sample-test/requirements.in b/test/sample-test/requirements.in index b98dc7e37713..2d48979e5b68 100644 --- a/test/sample-test/requirements.in +++ b/test/sample-test/requirements.in @@ -1,12 +1,8 @@ +kfp==2.8.0 junit-xml minio -black==21.7b0 -papermill~=2.2 +black +papermill fire yamale kubernetes - -google-cloud-bigquery<3 -google-cloud-storage<2 -# TODO: remove deprecated dependency --r sdk/python/requirements-deprecated.txt diff --git a/test/sample-test/requirements.txt b/test/sample-test/requirements.txt index 42322bcaa95c..c52407539db0 100644 --- a/test/sample-test/requirements.txt +++ b/test/sample-test/requirements.txt @@ -1,194 +1,210 @@ # -# This file is autogenerated by pip-compile -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # -# pip-compile --output-file=- - +# pip-compile # -ansiwrap==0.8.4 +ansicolors==1.1.8 # via papermill -appdirs==1.4.4 - # via black -attrs==20.3.0 - # via jsonschema -black==21.7b0 +argon2-cffi==23.1.0 + # via minio +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +attrs==24.2.0 # via - # -r requirements.in - # papermill -cachetools==4.2.4 + # jsonschema + # referencing +black==24.8.0 + # via -r requirements.in +cachetools==5.5.0 # via google-auth -certifi==2021.10.8 +certifi==2024.7.4 # via + # kfp-server-api # kubernetes # minio # requests -charset-normalizer==2.0.9 +cffi==1.17.0 + # via argon2-cffi-bindings +charset-normalizer==3.3.2 # via requests -click==7.1.2 +click==8.1.7 # via # black + # kfp # papermill -entrypoints==0.3 - # via - # jupyter-client - # papermill -fire==0.4.0 +docstring-parser==0.16 + # via kfp +entrypoints==0.4 + # via papermill +fastjsonschema==2.20.0 + # via nbformat +fire==0.6.0 # via -r requirements.in -google-api-core[grpc]==1.31.5 +google-api-core==2.19.1 # via - # google-cloud-bigquery # google-cloud-core # google-cloud-storage -google-auth==1.35.0 + # kfp +google-auth==2.34.0 # via # google-api-core # google-cloud-core # google-cloud-storage + # kfp # kubernetes -google-cloud-bigquery==2.31.0 - # via -r requirements.in -google-cloud-core==1.7.2 +google-cloud-core==2.4.1 + # via google-cloud-storage +google-cloud-storage==2.18.2 + # via kfp +google-crc32c==1.5.0 # via - # google-cloud-bigquery # google-cloud-storage -google-cloud-storage==1.43.0 - # via -r requirements.in -google-crc32c==1.3.0 - # via google-resumable-media -google-resumable-media==2.1.0 - # via - # google-cloud-bigquery - # google-cloud-storage -googleapis-common-protos==1.54.0 + # google-resumable-media +google-resumable-media==2.7.2 + # via google-cloud-storage +googleapis-common-protos==1.64.0 # via google-api-core -grpcio==1.43.0 - # via - # google-api-core - # google-cloud-bigquery -idna==3.3 +idna==3.8 # via requests -ipython-genutils==0.2.0 - # via nbformat -jsonschema==3.2.0 +jsonschema==4.23.0 # via nbformat +jsonschema-specifications==2023.12.1 + # via jsonschema junit-xml==1.9 # via -r requirements.in -jupyter-client==7.1.0 +jupyter-client==8.6.2 # via nbclient -jupyter-core==4.11.2 +jupyter-core==5.7.2 # via # jupyter-client + # nbclient # nbformat -kubernetes==12.0.1 +kfp==2.8.0 # via -r requirements.in -minio==7.1.2 +kfp-pipeline-spec==0.3.0 + # via kfp +kfp-server-api==2.0.5 + # via kfp +kubernetes==26.1.0 + # via + # -r requirements.in + # kfp +minio==7.2.8 # via -r requirements.in -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via black -nbclient==0.5.9 +nbclient==0.10.0 # via papermill -nbformat==5.1.3 +nbformat==5.10.4 # via # nbclient # papermill -nest-asyncio==1.5.4 - # via - # jupyter-client - # nbclient -oauthlib==3.1.1 +oauthlib==3.2.2 # via requests-oauthlib -packaging==20.9 - # via - # google-api-core - # google-cloud-bigquery -papermill==2.3.3 +packaging==24.1 + # via black +papermill==2.6.0 # via -r requirements.in -pathspec==0.9.0 +pathspec==0.12.1 # via black -proto-plus==1.19.8 - # via google-cloud-bigquery -protobuf==3.19.1 +platformdirs==4.2.2 + # via + # black + # jupyter-core +proto-plus==1.24.0 + # via google-api-core +protobuf==4.25.4 # via # google-api-core - # google-cloud-bigquery - # google-cloud-storage # googleapis-common-protos + # kfp + # kfp-pipeline-spec # proto-plus -pyasn1==0.4.8 +pyasn1==0.6.0 # via # pyasn1-modules # rsa -pyasn1-modules==0.2.8 +pyasn1-modules==0.4.0 # via google-auth -pyparsing==2.4.7 - # via packaging -pyrsistent==0.18.0 - # via jsonschema -python-dateutil==2.8.2 +pycparser==2.22 + # via cffi +pycryptodome==3.20.0 + # via minio +python-dateutil==2.9.0.post0 # via - # google-cloud-bigquery # jupyter-client + # kfp-server-api # kubernetes -pytz==2021.3 - # via google-api-core -pyyaml==5.4.1 +pyyaml==6.0.2 # via + # kfp # kubernetes # papermill # yamale -pyzmq==22.3.0 +pyzmq==26.2.0 # via jupyter-client -regex==2021.11.10 - # via black -requests==2.26.0 +referencing==0.35.1 + # via + # jsonschema + # jsonschema-specifications +requests==2.32.3 # via # google-api-core - # google-cloud-bigquery # google-cloud-storage # kubernetes # papermill # requests-oauthlib -requests-oauthlib==1.3.0 + # requests-toolbelt +requests-oauthlib==2.0.0 # via kubernetes -rsa==4.8 +requests-toolbelt==0.10.1 + # via kfp +rpds-py==0.20.0 + # via + # jsonschema + # referencing +rsa==4.9 # via google-auth six==1.15.0 # via # fire - # google-api-core - # google-auth - # google-cloud-core - # google-cloud-storage - # grpcio - # jsonschema # junit-xml + # kfp-server-api # kubernetes # python-dateutil -tenacity==8.0.1 +tabulate==0.9.0 + # via kfp +tenacity==9.0.0 # via papermill -termcolor==1.1.0 +termcolor==2.4.0 # via fire -textwrap3==0.9.2 - # via ansiwrap -tomli==1.2.3 +tomli==2.0.1 # via black -tornado==6.1 +tornado==6.4.1 # via jupyter-client -tqdm==4.62.3 +tqdm==4.66.5 # via papermill -traitlets==5.1.1 +traitlets==5.14.3 # via # jupyter-client # jupyter-core # nbclient # nbformat -urllib3==1.26.7 +typing-extensions==4.12.2 + # via + # black + # minio +urllib3==1.26.19 # via + # kfp + # kfp-server-api # kubernetes # minio # requests -websocket-client==1.2.3 +websocket-client==1.8.0 # via kubernetes -yamale==4.0.2 +yamale==5.2.1 # via -r requirements.in # The following packages are considered to be unsafe in a requirements file: diff --git a/test/sample-test/run_sample_test.py b/test/sample-test/run_sample_test.py index 1a77d2fd0504..2c9410fd148b 100644 --- a/test/sample-test/run_sample_test.py +++ b/test/sample-test/run_sample_test.py @@ -121,8 +121,7 @@ def run(self): yamlerr)) except OSError as ose: print( - 'Config file with the same name not found, use default args:{}' - .format(ose)) + f'Config file "{config_file}" not found, using default args: {raw_args}') else: if 'arguments' in raw_args.keys() and raw_args['arguments']: self._test_args.update(raw_args['arguments']) @@ -153,27 +152,17 @@ def check(self): """Check pipeline run results.""" if self._run_pipeline: ###### Monitor Job ###### - try: - start_time = datetime.now() - response = self._client.wait_for_run_completion( - self._run_id, self._test_timeout) - succ = (response.state.lower() == 'succeeded') - end_time = datetime.now() - elapsed_time = (end_time - start_time).seconds - utils.add_junit_test(self._test_cases, 'job completion', succ, - 'waiting for job completion failure', - elapsed_time) - finally: - # TODO(chensun): print log for debugging - pass - - if not succ: - utils.write_junit_xml(self._test_name, self._result, - self._test_cases) - exit(1) - - ###### Delete Job ###### - #TODO: add deletion when the backend API offers the interface. - - ###### Write out the test result in junit xml ###### - utils.write_junit_xml(self._test_name, self._result, self._test_cases) + start_time = datetime.now() + response = self._client.wait_for_run_completion(self._run_id, self._test_timeout) + succ = (response.state.lower() == 'succeeded') + end_time = datetime.now() + elapsed_time = (end_time - start_time).seconds + utils.add_junit_test(self._test_cases, 'job completion', succ, + 'waiting for job completion failure', + elapsed_time) + print(f'Pipeline {"worked" if succ else "Failed"}. Elapsed time: {elapsed_time}s') + + ###### Delete Job ###### + #TODO: add deletion when the backend API offers the interface. + + assert succ diff --git a/test/sample-test/sample_test_launcher.py b/test/sample-test/sample_test_launcher.py index f503194acd49..3de1c06d7be9 100644 --- a/test/sample-test/sample_test_launcher.py +++ b/test/sample-test/sample_test_launcher.py @@ -71,7 +71,9 @@ def __init__(self, raise RuntimeError( 'Failed to get inverse proxy hostname') from err - print('KFP API host is %s' % self._host) + # With the healthz API in place, when the developer clicks the link, + # it will lead to a functional URL instead of a 404 error. + print(f'KFP API healthz endpoint is: {self._host}/apis/v1beta1/healthz') self._is_notebook = None self._work_dir = os.path.join(BASE_DIR, 'samples/core/', @@ -88,7 +90,9 @@ def _compile(self): # Looking for the entry point of the test. list_of_files = os.listdir('.') for file in list_of_files: - m = re.match(self._test_name + '\.[a-zA-Z]+', file) + # matching by .py or .ipynb, there will be yaml ( compiled ) files in the folder. + # if you rerun the test suite twice, the test suite will fail + m = re.match(self._test_name + '\.(py|ipynb)$', file) if m: file_name, ext_name = os.path.splitext(file) if self._is_notebook is not None: @@ -156,13 +160,16 @@ def _compile(self): parameters=nb_params, prepare_only=True) # Convert to python script. - subprocess.call([ + return_code = subprocess.call([ 'jupyter', 'nbconvert', '--to', 'python', '%s.ipynb' % self._test_name ]) else: - subprocess.call(['python3', '%s.py' % self._test_name]) + return_code = subprocess.call(['python3', '%s.py' % self._test_name]) + + # Command executed successfully! + assert return_code == 0 def _injection(self): """Inject images for pipeline components.