Skip to content

Commit

Permalink
Improve examples CI to automatically pick up projects (#166)
Browse files Browse the repository at this point in the history
* make ci_examples run only when needed

* remove non used init file

* only run those examples that have test files

* address comments and improve documentation

* add concurrency for ci-examples
  • Loading branch information
casassg authored Aug 22, 2022
1 parent 5823067 commit f18614c
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 9 deletions.
30 changes: 29 additions & 1 deletion .github/workflows/ci_examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,41 @@ on:
- main
- r*

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
filter_examples:
# Dynamic matrix trick inspired by https://www.cynkra.com/blog/2020-12-23-dynamic-gha/
runs-on: ubuntu-latest
timeout-minutes: 60
outputs:
projects: ${{ steps.set-matrix.outputs.projects }}
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Get Changed Files
id: changed_files
uses: trilom/[email protected]
with:
output: json
- name: Filter example projects
id: set-matrix
run: |
echo "::set-output name=projects::$(python ./.github/workflows/filter_examples.py $HOME/files.json)"
ci-examples:
runs-on: ubuntu-latest
needs: filter_examples
timeout-minutes: 60
strategy:
# Test for each project in parallel using ci_max and ci_min to ensure
# tested in range of tfx/tensorflow supported versions
matrix:
project: [sklearn_penguins, xgboost_penguins]
project: ${{fromJson(needs.filter_examples.outputs.projects)}}
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
Expand Down
112 changes: 112 additions & 0 deletions .github/workflows/filter_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Internal script to parse changed files and potential examples and returns the overlap"""

import argparse
import json
import logging
import os
import sys
from typing import List

import pkg_resources

# Dynamically load .github as module so that we can do relative import here
BASE_DIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(os.path.join(BASE_DIR, ".github"))

from workflows import filter_projects # pylint: disable=wrong-import-position

logging.getLogger().setLevel(logging.INFO)

# NB(casassg): Files that if changed should trigger running CI for all examples.
# This are files which are core and we want to avoid causing outages
# because of them
RUN_ALL_FILES = [
".github/workflows/ci_examples.yml",
".github/workflows/filter_examples.yml"
]


def _get_testable_examples() -> List[str]:
"""Get projects that have requirements.txt.
"""

projects = []
for project in os.listdir(os.path.join(BASE_DIR, "examples")):
if not os.path.exists(
os.path.join(BASE_DIR, "examples", project, "requirements.txt")):
continue
test_files = [
f for f in os.listdir(os.path.join(BASE_DIR, "examples", project))
if 'test' in f and f.endswith(".py")
]
if test_files:
projects.append(project)

return projects


def _get_affected_examples(affected_files: List[str]) -> List[str]:
"""Given a list of affected files, and projects that can be tested,
find what projects should CI run"""

logging.info("Found affected files: %s", affected_files)
testable_examples = _get_testable_examples()
logging.info("Found %s testable example folders", testable_examples)
for run_all_file in RUN_ALL_FILES:
if run_all_file in affected_files:
logging.warning("Found change in %s, running all projects", run_all_file)
return testable_examples

examples_to_test = set()
for file in affected_files:
if file.startswith("examples"):
file_component = file.replace("examples/", "").split("/", maxsplit=1)[0]
if file_component in testable_examples:
logging.info("Package %s is marked for testing", file_component)
examples_to_test.add(file_component)
else:
logging.warning("Example %s is not testable, skipping", file_component)
affected_tfxa_projects = filter_projects.get_affected_projects(
affected_files)
for project in testable_examples:
with open(os.path.join(BASE_DIR, "examples", project,
"requirements.txt")) as f2:
requirements = [
l.replace("\n", "").replace("../..", "tfx_addons")
for l in f2.readlines()
]
logging.info("Found %s requirements for example %s", requirements, project)
for req in pkg_resources.parse_requirements(requirements):
if req.unsafe_name == "tfx_addons":
for extra in req.extras:
if extra in affected_tfxa_projects:
logging.info("Example %s depends on tfx_addons.%s running in CI.",
project, extra)
examples_to_test.add(project)
return list(examples_to_test)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("file_manifest")

args = parser.parse_args()

with open(args.file_manifest, "r") as f:
affected_components = _get_affected_examples(json.load(f))
print(json.dumps(affected_components))
12 changes: 6 additions & 6 deletions .github/workflows/filter_projects.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -26,7 +26,8 @@
# This are files which are core and we want to avoid causing outages
# because of them
RUN_ALL_FILES = [
"tfx_addons/version.py", "setup.py", ".github/workflows/ci.yml"
"tfx_addons/version.py", "setup.py", ".github/workflows/ci.yml",
"pyproject.toml"
]


Expand All @@ -42,12 +43,12 @@ def _get_testable_projects() -> List[str]:
return list(context["_PKG_METADATA"].keys())


def _get_affected_projects(affected_files: List[str],
testable_projects: List[str]) -> List[str]:
def get_affected_projects(affected_files: List[str]) -> List[str]:
"""Given a list of affected files, and projects that can be tested,
find what projects should CI run"""

logging.info("Found affected files: %s", affected_files)
testable_projects = _get_testable_projects()
for run_all_file in RUN_ALL_FILES:
if run_all_file in affected_files:
logging.warning("Found change in %s, running all projects", run_all_file)
Expand All @@ -74,6 +75,5 @@ def _get_affected_projects(affected_files: List[str],
args = parser.parse_args()

with open(args.file_manifest, "r") as f:
affected_components = _get_affected_projects(json.load(f),
_get_testable_projects())
affected_components = get_affected_projects(json.load(f))
print(json.dumps(affected_components))
7 changes: 5 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ SIG team members will be assigned to review your pull requests. Once the pull re

Each project specifies it's own Python dependencies depending on what folder it lives under:

- **examples/ projects**: Those need to provide a `requirements.txt` in the root of their folder. Example: `examples/xgboost_penguins/requirements.txt`. If you want your example to be executed as part of CI, you will also need to modify [ci_examples.yml](https://github.com/tensorflow/tfx-addons/blob/main/.github/workflows/ci_examples.yml#L31) and add the name of your `examples/{project_name}` to the `projects` array. You can depend on a `tfx_addons` project by using `../..[project_name]` in your `requirements.txt` file.
- **tfx_addons/ projects**: In order for project to be included in release and be tested, you will need to specify dependencies in [tfx_addons/version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py) `_PKG_METADATA` where key is the project name (aka tfx_addons/{project_name}) and value is a list of requirements strings needed for your component. Once added, this will automatically be picked up by CI and will automatically include your project into the tfx-addons release. In addition, your project will be added to the `tfx_addons.{project_name}` namespace, such that it can be used:
* **Projects in `examples/`**: Those need to provide a `requirements.txt` in the root of their folder. Example: `examples/xgboost_penguins/requirements.txt`. You can depend on a `tfx_addons` project by using `../..[project_name]` in your `requirements.txt` file.
* **Projects in `tfx_addons/`**: In order for project to be included in release and be tested, you will need to specify dependencies in [tfx_addons/version.py](https://github.com/tensorflow/tfx-addons/blob/main/tfx_addons/version.py) `_PKG_METADATA` where key is the project name (aka tfx_addons/{project_name}) and value is a list of requirements strings needed for your component. Once added, this will automatically be picked up by CI and will automatically include your project into the tfx-addons release. In addition, your project will be added to the `tfx_addons.{project_name}` namespace, such that it can be used:

```python

Expand All @@ -109,6 +109,7 @@ import tfx_addons as tfxa
tfxa.project_name
```

Note that CI runs on `pytest`, see _Testing your code_ below to check how to create tests for your code.

### Development tips

Expand Down Expand Up @@ -168,3 +169,5 @@ We use pytest to run tests. You can run tests locally using:
- Choose component to develop: `export COMPONENT_NAME=mlmd_client` (replace with the component you will be developing)
- Install test packages: `pip install -e ".[$COMPONENT_NAME,test]"`
- Run tests: `python -m pytest tfx_addons/$COMPONENT_NAME`

Note that only files that end with `_test.py` will be recognized as test. Learn more on writing pytest tests in [pytest docs](https://docs.pytest.org/en/latest/getting-started.html#create-your-first-test).

0 comments on commit f18614c

Please sign in to comment.