Skip to content

Commit

Permalink
Improve development workflow (#154)
Browse files Browse the repository at this point in the history
* requirements

* add coverage package

* update .travis.yml to use makefile

* skip setup on every command

* reformat wip

* first pass at flake8 errors

* ignore invalid escape sequence in test

* fix F811 redefinition

* shellcheck

* run linters before tests

* update readme

* fix absolute path in requirements.txt

* urllib3 upper version bound

* update readme

* Add instructions for compiling requirements

* instructions to rerun setup after compiling requirements

* bump
  • Loading branch information
jeevb authored Aug 13, 2020
1 parent 9355830 commit 83c10cc
Show file tree
Hide file tree
Showing 257 changed files with 6,316 additions and 6,930 deletions.
5 changes: 2 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ language: python
python:
- "3.6"
install:
- pip install -r requirements.txt
- pip install -U .[all]
- pip install codecov
- make setup
script:
- make lint
- coverage run -m pytest tests/flytekit/unit
- shellcheck **/*.sh
after_success:
Expand Down
43 changes: 43 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
define PIP_COMPILE
pip-compile $(1) --upgrade --verbose
endef

.SILENT: help
.PHONY: help
help:
echo Available recipes:
cat $(MAKEFILE_LIST) | grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' | awk 'BEGIN { FS = ":.*?## " } { cnt++; a[cnt] = $$1; b[cnt] = $$2; if (length($$1) > max) max = length($$1) } END { for (i = 1; i <= cnt; i++) printf " $(shell tput setaf 6)%-*s$(shell tput setaf 0) %s\n", max, a[i], b[i] }'
tput sgr0

.PHONY: _install-piptools
_install-piptools:
pip install -U pip-tools

.PHONY: setup
setup: _install-piptools ## Install requirements
pip-sync requirements.txt dev-requirements.txt

.PHONY: fmt
fmt: ## Format code with black and isort
black .
isort .

.PHONY: lint
lint: ## Run linters
flake8 .

.PHONY: test
test: lint ## Run tests
pytest tests/flytekit/unit
shellcheck **/*.sh

requirements.txt: export CUSTOM_COMPILE_COMMAND := make requirements.txt
requirements.txt: requirements.in _install-piptools
$(call PIP_COMPILE,requirements.in)

dev-requirements.txt: export CUSTOM_COMPILE_COMMAND := make dev-requirements.txt
dev-requirements.txt: dev-requirements.in requirements.txt _install-piptools
$(call PIP_COMPILE,dev-requirements.in)

.PHONY: requirements
requirements: requirements.txt dev-requirements.txt ## Compile requirements
48 changes: 39 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,23 +81,53 @@ Or install them with the `all` directive. `all` defaults to Spark 2.4.x currentl
pip install "flytekit[all]"
```

## Testing
## Development

Flytekit is Python 2.7+ compatible, so when feasible, it is recommended to test with both Python 2 and 3.
### Recipes

### Unit Testing
```
$ make
Available recipes:
setup Install requirements
fmt Format code with black and isort
lint Run linters
test Run tests
requirements Compile requirements
```

### Setup (Do Once)

#### Setup (Do Once)
```bash
virtualenv ~/.virtualenvs/flytekit
source ~/.virtualenvs/flytekit/bin/activate
python -m pip install -r requirements.txt
python -m pip install -U ".[all]"
make setup
```

### Formatting

We use [black](https://github.com/psf/black) and [isort](https://github.com/timothycrosley/isort) to autoformat code. Run the following command to execute the formatters:

```bash
source ~/.virtualenvs/flytekit/bin/activate
make fmt
```

#### Execute
### Testing

#### Unit Testing

```bash
source ~/.virtualenvs/flytekit/bin/activate
python -m pytest tests/flytekit/unit
shellcheck **/*.sh
make test
```

### Updating requirements

Update requirements in [`setup.py`](setup.py), or update requirements for development in [`dev-requirements.in`](dev-requirements.in). Then, validate, pin and freeze all requirements by running:

```bash
source ~/.virtualenvs/flytekit/bin/activate
make requirements
```

This will re-create the [`requirements.txt`](requirements.txt) and [`dev-requirements.txt`](dev-requirements.txt) files which will be used for testing. You will have also have to re-run `make setup` to update your local environment with the updated requirements.
10 changes: 10 additions & 0 deletions dev-requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-c requirements.txt

black
coverage
flake8
flake8-black
flake8-isort
isort
mock
pytest
34 changes: 34 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#
# This file is autogenerated by pip-compile
# To update, run:
#
# make dev-requirements.txt
#
appdirs==1.4.4 # via -c requirements.txt, black
attrs==19.3.0 # via -c requirements.txt, black, pytest
black==19.10b0 # via -c requirements.txt, -r dev-requirements.in, flake8-black
click==7.1.2 # via -c requirements.txt, black
coverage==5.2.1 # via -r dev-requirements.in
flake8-black==0.2.1 # via -r dev-requirements.in
flake8-isort==4.0.0 # via -r dev-requirements.in
flake8==3.8.3 # via -r dev-requirements.in, flake8-black, flake8-isort
importlib-metadata==1.7.0 # via -c requirements.txt, flake8, pluggy, pytest
iniconfig==1.0.1 # via pytest
isort==5.3.2 # via -r dev-requirements.in, flake8-isort
mccabe==0.6.1 # via flake8
mock==4.0.2 # via -r dev-requirements.in
more-itertools==8.4.0 # via pytest
packaging==20.4 # via pytest
pathspec==0.8.0 # via -c requirements.txt, black
pluggy==0.13.1 # via pytest
py==1.9.0 # via pytest
pycodestyle==2.6.0 # via flake8
pyflakes==2.2.0 # via flake8
pyparsing==2.4.7 # via packaging
pytest==6.0.1 # via -r dev-requirements.in
regex==2020.7.14 # via -c requirements.txt, black
six==1.15.0 # via -c requirements.txt, packaging
testfixtures==6.14.1 # via flake8-isort
toml==0.10.1 # via -c requirements.txt, black, pytest
typed-ast==1.4.1 # via -c requirements.txt, black
zipp==3.1.0 # via -c requirements.txt, importlib-metadata
4 changes: 2 additions & 2 deletions flytekit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import absolute_import

import flytekit.plugins
import flytekit.plugins # noqa: F401

__version__ = '0.11.7'
__version__ = "0.12.0b0"
57 changes: 28 additions & 29 deletions flytekit/bin/entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from __future__ import absolute_import

import datetime as _datetime
import importlib as _importlib
import os as _os
import random as _random

import click as _click
import datetime as _datetime
import random as _random
from flyteidl.core import literals_pb2 as _literals_pb2

from flytekit.common import utils as _utils
from flytekit.common.exceptions import scopes as _scopes, system as _system_exceptions
from flytekit.configuration import internal as _internal_config, TemporaryConfiguration as _TemporaryConfiguration
from flytekit.common.exceptions import scopes as _scopes
from flytekit.common.exceptions import system as _system_exceptions
from flytekit.configuration import TemporaryConfiguration as _TemporaryConfiguration
from flytekit.configuration import internal as _internal_config
from flytekit.engines import loader as _engine_loader
from flytekit.interfaces.data import data_proxy as _data_proxy
from flytekit.interfaces import random as _flyte_random
from flytekit.interfaces.data import data_proxy as _data_proxy
from flytekit.models import literals as _literal_models


Expand All @@ -26,14 +28,14 @@ def _compute_array_job_index():
:rtype: int
"""
offset = 0
if _os.environ.get('BATCH_JOB_ARRAY_INDEX_OFFSET'):
offset = int(_os.environ.get('BATCH_JOB_ARRAY_INDEX_OFFSET'))
return offset + int(_os.environ.get(_os.environ.get('BATCH_JOB_ARRAY_INDEX_VAR_NAME')))
if _os.environ.get("BATCH_JOB_ARRAY_INDEX_OFFSET"):
offset = int(_os.environ.get("BATCH_JOB_ARRAY_INDEX_OFFSET"))
return offset + int(_os.environ.get(_os.environ.get("BATCH_JOB_ARRAY_INDEX_VAR_NAME")))


def _map_job_index_to_child_index(local_input_dir, datadir, index):
local_lookup_file = local_input_dir.get_named_tempfile('indexlookup.pb')
idx_lookup_file = _os.path.join(datadir, 'indexlookup.pb')
local_lookup_file = local_input_dir.get_named_tempfile("indexlookup.pb")
idx_lookup_file = _os.path.join(datadir, "indexlookup.pb")

# if the indexlookup.pb does not exist, then just return the index
if not _data_proxy.Data.data_exists(idx_lookup_file):
Expand All @@ -44,47 +46,44 @@ def _map_job_index_to_child_index(local_input_dir, datadir, index):
if len(mapping_proto.literals) < index:
raise _system_exceptions.FlyteSystemAssertion(
"dynamic task index lookup array size: {} is smaller than lookup index {}".format(
len(mapping_proto.literals), index))
len(mapping_proto.literals), index
)
)
return mapping_proto.literals[index].scalar.primitive.integer


@_scopes.system_entry_point
def _execute_task(task_module, task_name, inputs, output_prefix, test):
with _TemporaryConfiguration(_internal_config.CONFIGURATION_PATH.get()):
with _utils.AutoDeletingTempDir('input_dir') as input_dir:
with _utils.AutoDeletingTempDir("input_dir") as input_dir:
# Load user code
task_module = _importlib.import_module(task_module)
task_def = getattr(task_module, task_name)

if not test:
local_inputs_file = input_dir.get_named_tempfile('inputs.pb')
local_inputs_file = input_dir.get_named_tempfile("inputs.pb")

# Handle inputs/outputs for array job.
if _os.environ.get('BATCH_JOB_ARRAY_INDEX_VAR_NAME'):
if _os.environ.get("BATCH_JOB_ARRAY_INDEX_VAR_NAME"):
job_index = _compute_array_job_index()

# TODO: Perhaps remove. This is a workaround to an issue we perceived with limited entropy in
# TODO: AWS batch array jobs.
_flyte_random.seed_flyte_random(
"{} {} {}".format(
_random.random(),
_datetime.datetime.utcnow(),
job_index
)
"{} {} {}".format(_random.random(), _datetime.datetime.utcnow(), job_index)
)

# If an ArrayTask is discoverable, the original job index may be different than the one specified in
# the environment variable. Look up the correct input/outputs in the index lookup mapping file.
job_index = _map_job_index_to_child_index(input_dir, inputs, job_index)

inputs = _os.path.join(inputs, str(job_index), 'inputs.pb')
inputs = _os.path.join(inputs, str(job_index), "inputs.pb")
output_prefix = _os.path.join(output_prefix, str(job_index))

_data_proxy.Data.get_data(inputs, local_inputs_file)
input_proto = _utils.load_proto_from_file(_literals_pb2.LiteralMap, local_inputs_file)
_engine_loader.get_engine().get_task(task_def).execute(
_literal_models.LiteralMap.from_flyte_idl(input_proto),
context={'output_prefix': output_prefix}
_literal_models.LiteralMap.from_flyte_idl(input_proto), context={"output_prefix": output_prefix},
)


Expand All @@ -93,16 +92,16 @@ def _pass_through():
pass


@_pass_through.command('pyflyte-execute')
@_click.option('--task-module', required=True)
@_click.option('--task-name', required=True)
@_click.option('--inputs', required=True)
@_click.option('--output-prefix', required=True)
@_click.option('--test', is_flag=True)
@_pass_through.command("pyflyte-execute")
@_click.option("--task-module", required=True)
@_click.option("--task-name", required=True)
@_click.option("--inputs", required=True)
@_click.option("--output-prefix", required=True)
@_click.option("--test", is_flag=True)
def execute_task_cmd(task_module, task_name, inputs, output_prefix, test):
_click.echo(_utils.get_version_message())
_execute_task(task_module, task_name, inputs, output_prefix, test)


if __name__ == '__main__':
if __name__ == "__main__":
_pass_through()
Loading

0 comments on commit 83c10cc

Please sign in to comment.