diff --git a/.github/workflows/ci_examples.yml b/.github/workflows/ci_examples.yml index 12ed94ef..1a10bde3 100644 --- a/.github/workflows/ci_examples.yml +++ b/.github/workflows/ci_examples.yml @@ -22,13 +22,41 @@ on: - main - r* +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: + filter_examples: + # Dynamic matrix trick inspired by https://www.cynkra.com/blog/2020-12-23-dynamic-gha/ + runs-on: ubuntu-latest + timeout-minutes: 60 + outputs: + projects: ${{ steps.set-matrix.outputs.projects }} + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Get Changed Files + id: changed_files + uses: trilom/file-changes-action@v1.2.3 + with: + output: json + - name: Filter example projects + id: set-matrix + run: | + echo "::set-output name=projects::$(python ./.github/workflows/filter_examples.py $HOME/files.json)" ci-examples: runs-on: ubuntu-latest + needs: filter_examples timeout-minutes: 60 strategy: + # Test for each project in parallel using ci_max and ci_min to ensure + # tested in range of tfx/tensorflow supported versions matrix: - project: [sklearn_penguins, xgboost_penguins] + project: ${{fromJson(needs.filter_examples.outputs.projects)}} steps: - uses: actions/checkout@v2 - name: Set up Python 3.7 diff --git a/.github/workflows/filter_examples.py b/.github/workflows/filter_examples.py new file mode 100644 index 00000000..0ae735ee --- /dev/null +++ b/.github/workflows/filter_examples.py @@ -0,0 +1,112 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Internal script to parse changed files and potential examples and returns the overlap""" + +import argparse +import json +import logging +import os +import sys +from typing import List + +import pkg_resources + +# Dynamically load .github as module so that we can do relative import here +BASE_DIR = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(os.path.join(BASE_DIR, ".github")) + +from workflows import filter_projects # pylint: disable=wrong-import-position + +logging.getLogger().setLevel(logging.INFO) + +# NB(casassg): Files that if changed should trigger running CI for all examples. +# This are files which are core and we want to avoid causing outages +# because of them +RUN_ALL_FILES = [ + ".github/workflows/ci_examples.yml", + ".github/workflows/filter_examples.yml" +] + + +def _get_testable_examples() -> List[str]: + """Get projects that have requirements.txt. + """ + + projects = [] + for project in os.listdir(os.path.join(BASE_DIR, "examples")): + if not os.path.exists( + os.path.join(BASE_DIR, "examples", project, "requirements.txt")): + continue + test_files = [ + f for f in os.listdir(os.path.join(BASE_DIR, "examples", project)) + if 'test' in f and f.endswith(".py") + ] + if test_files: + projects.append(project) + + return projects + + +def _get_affected_examples(affected_files: List[str]) -> List[str]: + """Given a list of affected files, and projects that can be tested, + find what projects should CI run""" + + logging.info("Found affected files: %s", affected_files) + testable_examples = _get_testable_examples() + logging.info("Found %s testable example folders", testable_examples) + for run_all_file in RUN_ALL_FILES: + if run_all_file in affected_files: + logging.warning("Found change in %s, running all projects", run_all_file) + return testable_examples + + examples_to_test = set() + for file in affected_files: + if file.startswith("examples"): + file_component = file.replace("examples/", "").split("/", maxsplit=1)[0] + if file_component in testable_examples: + logging.info("Package %s is marked for testing", file_component) + examples_to_test.add(file_component) + else: + logging.warning("Example %s is not testable, skipping", file_component) + affected_tfxa_projects = filter_projects.get_affected_projects( + affected_files) + for project in testable_examples: + with open(os.path.join(BASE_DIR, "examples", project, + "requirements.txt")) as f2: + requirements = [ + l.replace("\n", "").replace("../..", "tfx_addons") + for l in f2.readlines() + ] + logging.info("Found %s requirements for example %s", requirements, project) + for req in pkg_resources.parse_requirements(requirements): + if req.unsafe_name == "tfx_addons": + for extra in req.extras: + if extra in affected_tfxa_projects: + logging.info("Example %s depends on tfx_addons.%s running in CI.", + project, extra) + examples_to_test.add(project) + return list(examples_to_test) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("file_manifest") + + args = parser.parse_args() + + with open(args.file_manifest, "r") as f: + affected_components = _get_affected_examples(json.load(f)) + print(json.dumps(affected_components)) diff --git a/.github/workflows/filter_projects.py b/.github/workflows/filter_projects.py index cbee794a..95fa20dd 100644 --- a/.github/workflows/filter_projects.py +++ b/.github/workflows/filter_projects.py @@ -1,4 +1,4 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,7 +26,8 @@ # This are files which are core and we want to avoid causing outages # because of them RUN_ALL_FILES = [ - "tfx_addons/version.py", "setup.py", ".github/workflows/ci.yml" + "tfx_addons/version.py", "setup.py", ".github/workflows/ci.yml", + "pyproject.toml" ] @@ -42,12 +43,12 @@ def _get_testable_projects() -> List[str]: return list(context["_PKG_METADATA"].keys()) -def _get_affected_projects(affected_files: List[str], - testable_projects: List[str]) -> List[str]: +def get_affected_projects(affected_files: List[str]) -> List[str]: """Given a list of affected files, and projects that can be tested, find what projects should CI run""" logging.info("Found affected files: %s", affected_files) + testable_projects = _get_testable_projects() for run_all_file in RUN_ALL_FILES: if run_all_file in affected_files: logging.warning("Found change in %s, running all projects", run_all_file) @@ -74,6 +75,5 @@ def _get_affected_projects(affected_files: List[str], args = parser.parse_args() with open(args.file_manifest, "r") as f: - affected_components = _get_affected_projects(json.load(f), - _get_testable_projects()) + affected_components = get_affected_projects(json.load(f)) print(json.dumps(affected_components)) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e68d1baf..c3f5c0a1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -99,8 +99,8 @@ SIG team members will be assigned to review your pull requests. Once the pull re Each project specifies it's own Python dependencies depending on what folder it lives under: -- **examples/ projects**: Those need to provide a `requirements.txt` in the root of their folder. Example: `examples/xgboost_penguins/requirements.txt`. If you want your example to be executed as part of CI, you will also need to modify [ci_examples.yml](https://github.com/tensorflow/tfx-addons/blob/main/.github/workflows/ci_examples.yml#L31) and add the name of your `examples/{project_name}` to the `projects` array. You can depend on a `tfx_addons` project by using `../..[project_name]` in your `requirements.txt` file. -- **tfx_addons/ projects**: In order for project to be included in release and be tested, you will need to specify dependencies in [tfx_addons/version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py) `_PKG_METADATA` where key is the project name (aka tfx_addons/{project_name}) and value is a list of requirements strings needed for your component. Once added, this will automatically be picked up by CI and will automatically include your project into the tfx-addons release. In addition, your project will be added to the `tfx_addons.{project_name}` namespace, such that it can be used: +* **Projects in `examples/`**: Those need to provide a `requirements.txt` in the root of their folder. Example: `examples/xgboost_penguins/requirements.txt`. You can depend on a `tfx_addons` project by using `../..[project_name]` in your `requirements.txt` file. +* **Projects in `tfx_addons/`**: In order for project to be included in release and be tested, you will need to specify dependencies in [tfx_addons/version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py) `_PKG_METADATA` where key is the project name (aka tfx_addons/{project_name}) and value is a list of requirements strings needed for your component. Once added, this will automatically be picked up by CI and will automatically include your project into the tfx-addons release. In addition, your project will be added to the `tfx_addons.{project_name}` namespace, such that it can be used: ```python @@ -109,6 +109,7 @@ import tfx_addons as tfxa tfxa.project_name ``` +Note that CI runs on `pytest`, see _Testing your code_ below to check how to create tests for your code. ### Development tips @@ -168,3 +169,5 @@ We use pytest to run tests. You can run tests locally using: - Choose component to develop: `export COMPONENT_NAME=mlmd_client` (replace with the component you will be developing) - Install test packages: `pip install -e ".[$COMPONENT_NAME,test]"` - Run tests: `python -m pytest tfx_addons/$COMPONENT_NAME` + +Note that only files that end with `_test.py` will be recognized as test. Learn more on writing pytest tests in [pytest docs](https://docs.pytest.org/en/latest/getting-started.html#create-your-first-test).