Skip to content

Commit

Permalink
Merge branch 'main' into tgoelles/main
Browse files Browse the repository at this point in the history
# Conflicts:
#	kedro-datasets/RELEASE.md
#	kedro-datasets/setup.py
  • Loading branch information
merelcht committed Jan 4, 2024
2 parents d97b476 + b359ca6 commit 08b31be
Show file tree
Hide file tree
Showing 175 changed files with 3,037 additions and 3,089 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/kedro-airflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest, windows-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
uses: ./.github/workflows/unit-tests.yml
with:
plugin: kedro-airflow
Expand All @@ -40,7 +40,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
uses: ./.github/workflows/e2e-tests.yml
with:
plugin: kedro-airflow
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/kedro-datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest, windows-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.9", "3.10", "3.11" ]
uses: ./.github/workflows/unit-tests.yml
with:
plugin: kedro-datasets
Expand All @@ -41,21 +41,21 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python 3.8
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.8"
python-version: "3.9"
- name: Cache python packages
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: kedro-datasets-ubuntu-latest-python-"3.8"
key: kedro-datasets-ubuntu-latest-python-"3.9"
restore-keys: kedro-datasets
- name: Install dependencies
run: |
python -m pip install -U "pip>=21.2,<23.2" # Temporary fix
cd kedro-datasets
pip install ".[docs]"
pip install ".[test]"
pip install ".[docs,test]"
- name: RTD build for kedro-datasets
run: |
make rtd
4 changes: 2 additions & 2 deletions .github/workflows/kedro-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest, windows-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
uses: ./.github/workflows/unit-tests.yml
with:
plugin: kedro-docker
Expand All @@ -40,7 +40,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
uses: ./.github/workflows/e2e-tests.yml
with:
plugin: kedro-docker
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/kedro-telemetry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
uses: ./.github/workflows/unit-tests.yml
with:
plugin: kedro-telemetry
Expand All @@ -40,7 +40,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
uses: ./.github/workflows/e2e-tests.yml
with:
plugin: kedro-telemetry
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
- name: Install dependencies
run: |
cd ${{ inputs.plugin }}
python -m pip install -U "pip>=21.2,<23.2" # Temporary fix
pip install git+https://github.com/kedro-org/kedro@main
pip install ".[test]"
pip freeze
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
- name: Install dependencies
run: |
cd ${{ inputs.plugin }}
python -m pip install -U "pip>=21.2,<23.2" # Temporary fix
pip install ".[test]"
- name: pip freeze
run: pip freeze
Expand Down
31 changes: 26 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.SUFFIXES:

package:
cd $(plugin);\
rm -Rf dist;\
Expand All @@ -20,10 +22,28 @@ test:
cd $(plugin) && pytest tests --cov-config pyproject.toml --numprocesses 4 --dist loadfile

# Run test_tensorflow_model_dataset separately, because these tests are flaky when run as part of the full test-suite
dataset-tests:
cd kedro-datasets && pytest tests --cov-config pyproject.toml --numprocesses 4 --dist loadfile --ignore tests/tensorflow
dataset-tests: dataset-doctests
cd kedro-datasets && pytest tests --cov-config pyproject.toml --numprocesses 4 --dist loadfile --ignore tests/databricks --ignore tests/tensorflow
cd kedro-datasets && pytest tests/tensorflow/test_tensorflow_model_dataset.py --no-cov

extra_pytest_args-no-spark=--ignore kedro_datasets/databricks --ignore kedro_datasets/spark
extra_pytest_args=
dataset-doctest%:
if [ "${*}" != 's-no-spark' ] && [ "${*}" != 's' ]; then \
echo "make: *** No rule to make target \`${@}\`. Stop."; \
exit 2; \
fi; \
\
# The ignored datasets below require complicated setup with cloud/database clients which is overkill for the doctest examples.
cd kedro-datasets && pytest kedro_datasets --doctest-modules --doctest-continue-on-failure --no-cov \
--ignore kedro_datasets/pandas/gbq_dataset.py \
--ignore kedro_datasets/partitions/partitioned_dataset.py \
--ignore kedro_datasets/redis/redis_dataset.py \
--ignore kedro_datasets/snowflake/snowpark_dataset.py \
--ignore kedro_datasets/spark/spark_hive_dataset.py \
--ignore kedro_datasets/spark/spark_jdbc_dataset.py \
$(extra_pytest_arg${*})

test-sequential:
cd $(plugin) && pytest tests --cov-config pyproject.toml

Expand Down Expand Up @@ -56,15 +76,16 @@ sign-off:
chmod +x .git/hooks/commit-msg

# kedro-datasets related only
test-no-spark:
test-no-spark: dataset-doctests-no-spark
cd kedro-datasets && pytest tests --no-cov --ignore tests/spark --ignore tests/databricks --numprocesses 4 --dist loadfile

test-no-spark-sequential:
test-no-spark-sequential: dataset-doctests-no-spark
cd kedro-datasets && pytest tests --no-cov --ignore tests/spark --ignore tests/databricks

# kedro-datasets/snowflake tests skipped from default scope
test-snowflake-only:
cd kedro-datasets && pytest tests --no-cov --numprocesses 1 --dist loadfile -m snowflake
cd kedro-datasets && pytest --no-cov --numprocesses 1 --dist loadfile -m snowflake
cd kedro-datasets && pytest kedro_datasets/snowflake --doctest-modules --doctest-continue-on-failure --no-cov

rtd:
cd kedro-datasets && python -m sphinx -WETan -j auto -D language=en -b linkcheck -d _build/doctrees docs/source _build/linkcheck
24 changes: 18 additions & 6 deletions kedro-airflow/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Kedro-Airflow

[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Python Version](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue.svg)](https://pypi.org/project/kedro-airflow/)
[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://pypi.org/project/kedro-airflow/)
[![PyPI Version](https://badge.fury.io/py/kedro-airflow.svg)](https://pypi.org/project/kedro-airflow/)
[![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black)

Expand Down Expand Up @@ -42,7 +42,7 @@ The Airflow DAG configuration can be customized by editing this file.
### Step 3: Package and install the Kedro pipeline in the Airflow executor's environment

After generating and deploying the DAG file, you will then need to package and install the Kedro pipeline into the Airflow executor's environment.
Please visit the guide to [deploy Kedro as a Python package](https://kedro.readthedocs.io/en/stable/10_deployment/02_single_machine.html#package-based) for more details.
Please visit the guide to [deploy Kedro as a Python package](https://docs.kedro.org/en/stable/deployment/single_machine.html#package-based) for more details.

### FAQ

Expand Down Expand Up @@ -100,9 +100,7 @@ In order to configure the config loader, update the `settings.py` file in your K
For instance, if you would like to use the name `scheduler`, then change the file as follows:

```python
CONFIG_LOADER_ARGS = {
"config_patterns": {"airflow": ["scheduler*", "scheduler/**"]}
}
CONFIG_LOADER_ARGS = {"config_patterns": {"airflow": ["scheduler*", "scheduler/**"]}}
```

Follow Kedro's [official documentation](https://docs.kedro.org/en/stable/configuration/advanced_configuration.html#how-to-do-templating-with-the-omegaconfigloader), to see how to add templating, custom resolvers etc.
Expand Down Expand Up @@ -130,10 +128,13 @@ In order to configure the `OmegaConfigLoader`, update the `settings.py` file in

```python
from kedro.config import OmegaConfigLoader
CONFIG_LOADER_CLASS = OmegaConfigLoader
CONFIG_LOADER_ARGS = {
# other args
"config_patterns": {"airflow": ["airflow*", "airflow/**"]} # configure the pattern for configuration files
"config_patterns": { # configure the pattern for configuration files
"airflow": ["airflow*", "airflow/**"]
}
}
```

Expand All @@ -152,3 +153,14 @@ You can set the operator to use by providing a custom template.
See ["What if I want to use a different Jinja2 template?"](#what-if-i-want-to-use-a-different-jinja2-template) for instructions on using custom templates.
The [rich offering](https://airflow.apache.org/docs/apache-airflow-providers/operators-and-hooks-ref/index.html) of operators means that the `kedro-airflow` plugin is providing templates for specific operators.
The default template provided by `kedro-airflow` uses the `BaseOperator`.

## Can I contribute?

Yes! Want to help build Kedro-Airflow? Check out our guide to [contributing](https://github.com/kedro-org/kedro-plugins/blob/main/kedro-airflow/CONTRIBUTING.md).

## What licence do you use?

Kedro-Airflow is licensed under the [Apache 2.0](https://github.com/kedro-org/kedro-plugins/blob/main/LICENSE.md) License.

## Python version support policy
* The [Kedro-Airflow](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-airflow) supports all Python versions that are actively maintained by the CPython core team. When a [Python version reaches end of life](https://devguide.python.org/versions/#versions), support for that version is dropped from `kedro-airflow`. This is not considered a breaking change.
6 changes: 6 additions & 0 deletions kedro-airflow/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Upcoming Release

# Release 0.8.0
* Added support for Kedro 0.19.x

# Release 0.7.0
* Added support for Python 3.11
* Added the `--all` CLI argument to `kedro-airflow` to convert registered all pipelines at once.
* Simplify the output of the `kedro airflow create` command.
* Fixed compatibility of `kedro-airflow` with older versions of the config loaders (`kedro<=0.18.2`).
* Removed support for Python 3.7

## Community contributions
Many thanks to the following Kedroids for contributing PRs to this release:
Expand Down
12 changes: 5 additions & 7 deletions kedro-airflow/features/airflow.feature
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,19 @@ Feature: Airflow
Given I have installed kedro version "latest"
And I have prepared a config file
And I have run a non-interactive kedro new
And I have prepared a data catalog
And I have executed the kedro command "airflow create -t ../airflow/dags/"
When I execute the airflow command "tasks list project-dummy"
Then I should get a successful exit code
And I should get a message including "split"
And I should get a message including "make-predictions"
And I should get a message including "report-accuracy"
And I should get a message including "create-model-input-table-node"
And I should get a message including "preprocess-companies-node"
And I should get a message including "preprocess-shuttles-node"

Scenario: Run Airflow task locally with latest Kedro
Given I have installed kedro version "latest"
And I have prepared a config file
And I have run a non-interactive kedro new
And I have prepared a data catalog
And I have executed the kedro command "airflow create -t ../airflow/dags/"
And I have installed the kedro project package
When I execute the airflow command "tasks test project-dummy split 2016-06-01T00:00:00+00:00"
When I execute the airflow command "tasks test project-dummy preprocess-companies-node"
Then I should get a successful exit code
And I should get a message including "Loading data from 'parameters'"
And I should get a message including "Loading data from companies"
70 changes: 2 additions & 68 deletions kedro-airflow/features/steps/cli_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,6 @@ def init_airflow(context, home_dir):
assert res.returncode == 0


@given("I have prepared an old data catalog")
def prepare_old_catalog(context):
config = {
"example_train_x": {
"type": "PickleLocalDataset",
"filepath": "data/02_intermediate/example_train_x.pkl",
},
"example_train_y": {
"type": "PickleLocalDataset",
"filepath": "data/02_intermediate/example_train_y.pkl",
},
"example_test_x": {
"type": "PickleLocalDataset",
"filepath": "data/02_intermediate/example_test_x.pkl",
},
"example_test_y": {
"type": "PickleLocalDataset",
"filepath": "data/02_intermediate/example_test_y.pkl",
},
"example_model": {
"type": "PickleLocalDataset",
"filepath": "data/02_intermediate/example_model.pkl",
},
"example_predictions": {
"type": "PickleLocalDataset",
"filepath": "data/02_intermediate/example_predictions.pkl",
},
}
catalog_file = context.root_project_dir / "conf" / "local" / "catalog.yml"
with catalog_file.open("w") as catalog_file:
yaml.dump(config, catalog_file, default_flow_style=False)


@given("I have prepared a data catalog")
def prepare_catalog(context):
config = {
"example_train_x": {
"type": "pickle.PickleDataset",
"filepath": "data/02_intermediate/example_train_x.pkl",
},
"example_train_y": {
"type": "pickle.PickleDataset",
"filepath": "data/02_intermediate/example_train_y.pkl",
},
"example_test_x": {
"type": "pickle.PickleDataset",
"filepath": "data/02_intermediate/example_test_x.pkl",
},
"example_test_y": {
"type": "pickle.PickleDataset",
"filepath": "data/02_intermediate/example_test_y.pkl",
},
"example_model": {
"type": "pickle.PickleDataset",
"filepath": "data/02_intermediate/example_model.pkl",
},
"example_predictions": {
"type": "pickle.PickleDataset",
"filepath": "data/02_intermediate/example_predictions.pkl",
},
}
catalog_file = context.root_project_dir / "conf" / "local" / "catalog.yml"
with catalog_file.open("w") as catalog_file:
yaml.dump(config, catalog_file, default_flow_style=False)


@given('I have installed kedro version "{version}"')
def install_kedro(context, version):
"""Execute Kedro command and check the status."""
Expand All @@ -100,7 +34,7 @@ def install_kedro(context, version):
@given("I have installed the kedro project package")
def install_project_package(context):
"""Install the packaged project."""
cmd = [context.pip, "install", "-e", "src/"]
cmd = [context.pip, "install", "-e", "."]
res = run(cmd, env=context.env, cwd=str(context.root_project_dir))

if res.returncode != OK_EXIT_CODE:
Expand Down Expand Up @@ -159,7 +93,7 @@ def create_project_from_config_file(context):
"-c",
str(context.config_file),
"--starter",
"pandas-iris",
"spaceflights-pandas",
],
env=context.env,
cwd=str(context.temp_dir),
Expand Down
2 changes: 1 addition & 1 deletion kedro-airflow/kedro_airflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Kedro plugin for running a project with Airflow."""

__version__ = "0.6.0"
__version__ = "0.8.0"
3 changes: 1 addition & 2 deletions kedro-airflow/kedro_airflow/airflow_dag_template.j2
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ class KedroOperator(BaseOperator):

def execute(self, context):
configure_project(self.package_name)
with KedroSession.create(self.package_name,
self.project_path,
with KedroSession.create(project_path=self.project_path,
env=self.env) as session:
session.run(self.pipeline_name, node_names=[self.node_name])

Expand Down
2 changes: 1 addition & 1 deletion kedro-airflow/kedro_airflow/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _load_config(context: KedroContext) -> dict[str, Any]:
# Backwards compatibility for ConfigLoader that does not support `config_patterns`
config_loader = context.config_loader
if not hasattr(config_loader, "config_patterns"):
return config_loader.get("airflow*", "airflow/**")
return config_loader.get("airflow*", "airflow/**") # pragma: no cover

# Set the default pattern for `airflow` if not provided in `settings.py`
if "airflow" not in config_loader.config_patterns.keys():
Expand Down
Loading

0 comments on commit 08b31be

Please sign in to comment.