From a2363f72bc94ab2f453830fcc5a0ca1cf7ee464e Mon Sep 17 00:00:00 2001
From: SmritiSatyanV <94349093+SmritiSatyanV@users.noreply.github.com>
Date: Wed, 4 May 2022 23:23:31 +0530
Subject: [PATCH] Resolving pre-commit-hook changes (#741)

* Resolving pre-commit-hook changes

* Resolving pre-commit-hook changes

* Resolving pre-commit-hook changes

* updated makefile

Signed-off-by: SmritiSatyanV <smriti@union.ai>

* Updated makefile

Added spellcheck
Signed-off-by: SmritiSatyanV <smriti@union.ai>

* spellcheck

Signed-off-by: SmritiSatyanV <smriti@union.ai>
---
 .pre-commit-config.yaml                       |  24 +
 Makefile                                      |  14 +-
 .../bioinformatics/blast/blastx_example.py    |   2 -
 .../feature_engineering/eda/notebook.py       |   8 +-
 .../eda/notebook_and_task.py                  |  10 +-
 .../eda/notebooks_as_tasks.py                 |  14 +-
 .../feast_integration/Feast_Flyte_Demo.ipynb  |   2 +-
 .../feast_integration/feast_workflow.py       |   6 +-
 .../feast_integration/feature_eng_tasks.py    |   4 +-
 .../house_price_predictor.py                  |  39 +-
 .../multiregion_house_price_predictor.py      |  25 +-
 .../pytorch_single_node_and_gpu.py            |  28 +-
 .../pytorch_single_node_multi_gpu.py          |  94 ++-
 .../ml_training/pima_diabetes/diabetes.py     |  11 +-
 .../keras_spark_rossmann_estimator.py         |  47 +-
 cookbook/common/leaf.mk                       |   2 +-
 .../python/calculate-ellipse-area.py          |  17 +-
 .../core/containerization/raw_container.py    |   5 +-
 .../core/containerization/spot_instances.py   |   2 +-
 cookbook/core/containerization/use_secrets.py |  69 +-
 cookbook/core/control_flow/chain_tasks.py     |   7 +-
 cookbook/core/control_flow/checkpoint.py      |  19 +-
 cookbook/core/control_flow/conditions.py      |  74 +-
 cookbook/core/control_flow/dynamics.py        |  11 +-
 cookbook/core/control_flow/merge_sort.py      |  10 +-
 cookbook/core/control_flow/subworkflows.py    |  51 +-
 cookbook/core/extend_flyte/backend_plugins.py |   4 +-
 .../core/extend_flyte/custom_task_plugin.py   |   7 +-
 cookbook/core/extend_flyte/custom_types.py    |  13 +-
 cookbook/core/flyte_basics/basic_workflow.py  |   4 +-
 .../core/flyte_basics/decorating_tasks.py     |  11 +-
 .../core/flyte_basics/decorating_workflows.py |  11 +-
 .../core/flyte_basics/documented_workflow.py  |   3 +-
 cookbook/core/flyte_basics/files.py           |   9 +-
 cookbook/core/flyte_basics/folders.py         |  15 +-
 cookbook/core/flyte_basics/lp.py              |   4 +-
 cookbook/core/flyte_basics/named_outputs.py   |   2 +-
 cookbook/core/flyte_basics/shell_task.py      |   5 +-
 cookbook/core/flyte_basics/task_cache.py      |  30 +-
 .../core/flyte_basics/task_cache_serialize.py |   9 +-
 cookbook/core/type_system/enums.py            |   5 +-
 cookbook/core/type_system/flyte_pickle.py     |   2 +-
 .../core/type_system/structured_dataset.py    |  17 +-
 cookbook/core/type_system/typed_schema.py     |   4 +-
 cookbook/deployment/configure_use_gpus.py     |   2 +-
 cookbook/deployment/deploying_workflows.py    |   4 +-
 cookbook/deployment/lp_notifications.py       |  15 +-
 cookbook/dev-requirements.in                  |   7 +-
 cookbook/dev-requirements.txt                 | 794 +++---------------
 cookbook/docs/README.md                       |   2 +-
 cookbook/docs/conf.py                         |   9 +-
 cookbook/integrations/aws/batch/batch.py      |   3 +-
 .../sagemaker_pytorch_distributed_training.py |   4 +-
 .../sagemaker_builtin_algo_training.py        |   8 +-
 .../sagemaker_custom_training.py              |  17 +-
 .../external_services/snowflake/snowflake.py  |  21 +-
 .../dolt/dolt_branch_example.py               |  33 +-
 .../dolt/dolt_quickstart_example.py           |  11 +-
 .../greatexpectations/type_example.py         |  39 +-
 .../modin_examples/knn_classifier.py          |   4 +-
 .../pandera_examples/basic_schema_example.py  |  40 +-
 .../validating_and_testing_ml_pipelines.py    |  15 +-
 .../flytekit_plugins/sql/sql_alchemy.py       |  10 +-
 .../kubernetes/k8s_spark/README.rst           |   2 +-
 .../kubernetes/k8s_spark/pyspark_pi.py        |   2 +-
 .../kubernetes/kfmpi/mpi_mnist.py             |  71 +-
 .../kubernetes/kfpytorch/pytorch_mnist.py     |   4 +-
 .../kubernetes/kftensorflow/tf_mnist.py       |  15 +-
 cookbook/integrations/kubernetes/pod/pod.py   |  10 +-
 cookbook/larger_apps/larger_apps_iterate.py   |   2 +-
 70 files changed, 790 insertions(+), 1104 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..316f1b0ee2
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,24 @@
+repos:
+- repo: https://github.com/PyCQA/flake8
+  rev:  3.9.2
+  hooks:
+    - id: flake8
+- repo: https://github.com/psf/black
+  rev:  22.3.0
+  hooks:
+    - id: black
+- repo: https://github.com/PyCQA/isort
+  rev:  5.9.3
+  hooks:
+    - id: isort
+      args: ["--profile", "black"]
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.0.1
+  hooks:
+    - id: check-yaml
+    - id: end-of-file-fixer
+    - id: trailing-whitespace
+- repo: https://github.com/shellcheck-py/shellcheck-py
+  rev: v0.7.2.1
+  hooks:
+  - id: shellcheck
diff --git a/Makefile b/Makefile
index 90aaab115f..523e3fe22f 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,19 @@ update_boilerplate:
 	@curl https://raw.githubusercontent.com/flyteorg/boilerplate/master/boilerplate/update.sh -o boilerplate/update.sh
 	@boilerplate/update.sh
 
-
+.PHONY: fmt
+fmt: ## Format code with black and isort
+	pre-commit run black --all-files || true
+	pre-commit run isort --all-files || true
+
+.PHONY: lint
+lint: ## Run linters
+	pre-commit run --all-files
+
+.PHONY: spellcheck
+spellcheck:  ## Runs a spellchecker over all code and documentation
+	codespell -L "te,raison,fo" --skip="./docs/build,./.git"
+	
 .PHONY: help
 help: ## Show help message
 	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m\033[0m\n"} /^[$$()% a-zA-Z_-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
diff --git a/cookbook/case_studies/bioinformatics/blast/blastx_example.py b/cookbook/case_studies/bioinformatics/blast/blastx_example.py
index 325cf99eb8..a4b1cda5d2 100644
--- a/cookbook/case_studies/bioinformatics/blast/blastx_example.py
+++ b/cookbook/case_studies/bioinformatics/blast/blastx_example.py
@@ -11,12 +11,10 @@
 
 import matplotlib.pyplot as plt
 import pandas as pd
-
 from flytekit import conditional, kwtypes, task, workflow
 from flytekit.extras.tasks.shell import OutputLocation, ShellTask
 from flytekit.types.file import FlyteFile, PNGImageFile
 
-
 # %%
 # A ``ShellTask`` is useful to run commands on the shell.
 # In this example, we use ``ShellTask`` to generate and run the BLASTX command.
diff --git a/cookbook/case_studies/feature_engineering/eda/notebook.py b/cookbook/case_studies/feature_engineering/eda/notebook.py
index 7891587347..b5b5e8ce03 100644
--- a/cookbook/case_studies/feature_engineering/eda/notebook.py
+++ b/cookbook/case_studies/feature_engineering/eda/notebook.py
@@ -2,8 +2,8 @@
 Flyte Pipeline in One Jupyter Notebook
 =======================================
 
-In this example, we will implement a simple pipeline that takes hyperparameters, does EDA, feature engineering, and measures the Gradient 
-Boosting model's performace using mean absolute error (MAE), all in one notebook.
+In this example, we will implement a simple pipeline that takes hyperparameters, does EDA, feature engineering, and measures the Gradient
+Boosting model's performance using mean absolute error (MAE), all in one notebook.
 """
 
 # %%
@@ -15,7 +15,7 @@
 from flytekitplugins.papermill import NotebookTask
 
 # %%
-# We define a ``NotebookTask`` to run the `Jupyter notebook 
+# We define a ``NotebookTask`` to run the `Jupyter notebook
 # <https://github.com/flyteorg/flytesnacks/blob/master/cookbook/case_studies/feature_engineering/eda/supermarket_regression.ipynb>`__.
 #
 # .. list-table:: ``NotebookTask`` Parameters
@@ -49,6 +49,8 @@
 
 # %%
 # Since a task need not be defined, we create a ``workflow`` and return the MAE score.
+
+
 @workflow
 def notebook_wf(
     n_estimators: int = 150,
diff --git a/cookbook/case_studies/feature_engineering/eda/notebook_and_task.py b/cookbook/case_studies/feature_engineering/eda/notebook_and_task.py
index bed8ff8a59..163f948533 100644
--- a/cookbook/case_studies/feature_engineering/eda/notebook_and_task.py
+++ b/cookbook/case_studies/feature_engineering/eda/notebook_and_task.py
@@ -2,9 +2,9 @@
 EDA and Feature Engineering in Jupyter Notebook and Modeling in a Flyte Task
 ============================================================================
 
-In this example, we will implement a simple pipeline that takes hyperparameters, does EDA, feature engineering 
-(step 1: EDA and feature engineering in notebook), and measures the Gradient Boosting model's performace using mean absolute error (MAE) 
-(step 2: Modeling in a Flyte Task). 
+In this example, we will implement a simple pipeline that takes hyperparameters, does EDA, feature engineering
+(step 1: EDA and feature engineering in notebook), and measures the Gradient Boosting model's performance using mean absolute error (MAE)
+(step 2: Modeling in a Flyte Task).
 """
 
 # %%
@@ -37,7 +37,7 @@ class Hyperparameters(object):
 
 
 # %%
-# We define a ``NotebookTask`` to run the `Jupyter notebook 
+# We define a ``NotebookTask`` to run the `Jupyter notebook
 # <https://github.com/flyteorg/flytesnacks/blob/master/cookbook/case_studies/feature_engineering/eda/supermarket_regression_1.ipynb>`__.
 # This notebook returns ``dummified_data`` and ``dataset`` as the outputs.
 #
@@ -55,6 +55,8 @@ class Hyperparameters(object):
 # %%
 # Next, we define a ``cross_validate`` function and a ``modeling`` task to compute the MAE score of the data against
 # the Gradient Boosting Regressor.
+
+
 def cross_validate(model, nfolds, feats, targets):
     score = -1 * (
         cross_val_score(
diff --git a/cookbook/case_studies/feature_engineering/eda/notebooks_as_tasks.py b/cookbook/case_studies/feature_engineering/eda/notebooks_as_tasks.py
index d09b975133..7d07308269 100644
--- a/cookbook/case_studies/feature_engineering/eda/notebooks_as_tasks.py
+++ b/cookbook/case_studies/feature_engineering/eda/notebooks_as_tasks.py
@@ -2,9 +2,9 @@
 EDA and Feature Engineering in One Jupyter Notebook and Modeling in the Other
 =============================================================================
 
-In this example, we will implement a simple pipeline that takes hyperparameters, does EDA, feature engineering 
-(step 1: EDA and feature engineering in notebook), and measures the Gradient Boosting model's performace using mean absolute error 
-(MAE) (step 2: Modeling in notebook). 
+In this example, we will implement a simple pipeline that takes hyperparameters, does EDA, feature engineering
+(step 1: EDA and feature engineering in notebook), and measures the Gradient Boosting model's performance using mean absolute error
+(MAE) (step 2: Modeling in notebook).
 """
 
 # %%
@@ -17,7 +17,7 @@
 from flytekitplugins.papermill import NotebookTask
 
 # %%
-# We define a ``NotebookTask`` to run the `Jupyter notebook 
+# We define a ``NotebookTask`` to run the `Jupyter notebook
 # <https://github.com/flyteorg/flytesnacks/blob/master/cookbook/case_studies/feature_engineering/eda/supermarket_regression_1.ipynb>`__ (EDA).
 # This notebook returns ``dummified_data`` and ``dataset`` as the outputs.
 #
@@ -35,8 +35,8 @@
 )
 
 # %%
-# We define a ``NotebookTask`` to run the `Jupyter notebook 
-# <https://github.com/flyteorg/flytesnacks/blob/master/cookbook/case_studies/feature_engineering/eda/supermarket_regression_2.ipynb>`__ 
+# We define a ``NotebookTask`` to run the `Jupyter notebook
+# <https://github.com/flyteorg/flytesnacks/blob/master/cookbook/case_studies/feature_engineering/eda/supermarket_regression_2.ipynb>`__
 # (Modeling).
 #
 # This notebook returns ``mae_score`` as the output.
@@ -60,6 +60,8 @@
 
 # %%
 # We define a ``Workflow`` to run the notebook tasks.
+
+
 @workflow
 def notebook_wf(
     n_estimators: int = 150,
diff --git a/cookbook/case_studies/feature_engineering/feast_integration/Feast_Flyte_Demo.ipynb b/cookbook/case_studies/feature_engineering/feast_integration/Feast_Flyte_Demo.ipynb
index f4724a9fc3..263e50e8bc 100644
--- a/cookbook/case_studies/feature_engineering/feast_integration/Feast_Flyte_Demo.ipynb
+++ b/cookbook/case_studies/feature_engineering/feast_integration/Feast_Flyte_Demo.ipynb
@@ -71,7 +71,7 @@
    "source": [
     "#### Retrieve the latest registered version of the pipeline\n",
     "\n",
-    "FlyteRemote provides convienient methods to retrieve version of the pipeline from the remote server.\n",
+    "FlyteRemote provides convenient methods to retrieve version of the pipeline from the remote server.\n",
     "\n",
     "**NOTE** It is possible to get a specific version of the workflow and trigger a launch for that, but let's just get the latest."
    ]
diff --git a/cookbook/case_studies/feature_engineering/feast_integration/feast_workflow.py b/cookbook/case_studies/feature_engineering/feast_integration/feast_workflow.py
index dc36661ab8..2b21161b0d 100644
--- a/cookbook/case_studies/feature_engineering/feast_integration/feast_workflow.py
+++ b/cookbook/case_studies/feature_engineering/feast_integration/feast_workflow.py
@@ -18,18 +18,19 @@
 """
 
 import logging
+import random
 import typing
 
 # %%
 # Let's import the libraries.
 from datetime import datetime, timedelta
-import random
 
 import boto3
 import joblib
 import pandas as pd
 from feast import Entity, Feature, FeatureStore, FeatureView, FileSource, ValueType
-from flytekit import task, workflow, TaskMetadata, Resources
+from feast_dataobjects import FeatureStore, FeatureStoreConfig  # noqa : F811
+from flytekit import Resources, TaskMetadata, task, workflow
 from flytekit.configuration.internal import AWS
 from flytekit.extras.sqlite3.task import SQLite3Config, SQLite3Task
 from flytekit.types.file import JoblibSerializedFile
@@ -37,7 +38,6 @@
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import GaussianNB
 
-from feast_dataobjects import FeatureStore, FeatureStoreConfig
 from .feature_eng_tasks import mean_median_imputer, univariate_selection
 
 logger = logging.getLogger(__file__)
diff --git a/cookbook/case_studies/feature_engineering/feast_integration/feature_eng_tasks.py b/cookbook/case_studies/feature_engineering/feast_integration/feature_eng_tasks.py
index eb4ef7cd32..faf1a66726 100644
--- a/cookbook/case_studies/feature_engineering/feast_integration/feature_eng_tasks.py
+++ b/cookbook/case_studies/feature_engineering/feast_integration/feature_eng_tasks.py
@@ -27,7 +27,7 @@
 
 
 # %%
-# We define a ``mean_median_imputer`` task to fill in the missing values of the dataset, for which we use the 
+# We define a ``mean_median_imputer`` task to fill in the missing values of the dataset, for which we use the
 # `SimpleImputer <https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html>`__ class from the ``scikit-learn`` library.
 @task(cache=True, cache_version="1.0")
 def mean_median_imputer(
@@ -53,7 +53,7 @@ def mean_median_imputer(
 
 # %%
 # Let's define the other task called ``univariate_selection`` that does feature selection.
-# The `SelectKBest <https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html#sklearn.feature_selection.SelectKBest>`__ method removes all 
+# The `SelectKBest <https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html#sklearn.feature_selection.SelectKBest>`__ method removes all
 # but the highest scoring features (DataFrame columns).
 @task(cache=True, cache_version="1.0")
 def univariate_selection(
diff --git a/cookbook/case_studies/ml_training/house_price_prediction/house_price_predictor.py b/cookbook/case_studies/ml_training/house_price_prediction/house_price_predictor.py
index a8cc38b43a..8b5874dea7 100644
--- a/cookbook/case_studies/ml_training/house_price_prediction/house_price_predictor.py
+++ b/cookbook/case_studies/ml_training/house_price_prediction/house_price_predictor.py
@@ -4,12 +4,12 @@
 Predicting House Price in a Region Using XGBoost
 ------------------------------------------------
 
-`XGBoost <https://xgboost.readthedocs.io/en/latest/>`__ is an optimized distributed gradient boosting library designed to be efficient, flexible, and portable. 
+`XGBoost <https://xgboost.readthedocs.io/en/latest/>`__ is an optimized distributed gradient boosting library designed to be efficient, flexible, and portable.
 It uses `gradient boosting <https://en.wikipedia.org/wiki/Gradient_boosting>`__ technique to implement Machine Learning algorithms.
 
 In this tutorial, we will understand how to predict house prices using XGBoost, and Flyte.
 
-We will split the generated dataset into train, test and validation set. 
+We will split the generated dataset into train, test and validation set.
 
 Next, we will create three Flyte tasks, that will:
 
@@ -17,6 +17,7 @@
 2. Train the model using XGBoost.
 3. Generate predictions.
 
+
 Let's get started with the example!
 
 """
@@ -30,20 +31,21 @@
 #       pip install joblib
 #       pip install xgboost
 
+import os
+
 # %%
 # First, let's import the required packages into the environment.
 import typing
+from typing import Tuple
 
-import os
 import flytekit
 import joblib
 import numpy as np
 import pandas as pd
-from sklearn.model_selection import train_test_split
-from xgboost import XGBRegressor
 from flytekit import Resources, task, workflow
 from flytekit.types.file import JoblibSerializedFile
-from typing import Tuple
+from sklearn.model_selection import train_test_split
+from xgboost import XGBRegressor
 
 # %%
 # We initialize a variable to represent columns in the dataset. The other variables help generate the dataset.
@@ -66,6 +68,8 @@
 # =====================
 #
 # We define a function to compute the price of a house based on multiple factors (``number of bedrooms``, ``number of bathrooms``, ``area``, ``garage space``, and ``year built``).
+
+
 def gen_price(house) -> int:
     _base_price = int(house["SQUARE_FEET"] * 150)
     _price = int(
@@ -93,7 +97,7 @@ def gen_houses(num_houses) -> pd.DataFrame:
             "YEAR_BUILT": min(MAX_YEAR, int(np.random.normal(1995, 10))),
         }
         _price = gen_price(_house)
-        # column names/features 
+        # column names/features
         _house_list.append(
             [
                 _price,
@@ -105,13 +109,14 @@ def gen_houses(num_houses) -> pd.DataFrame:
                 _house["GARAGE_SPACES"],
             ]
         )
-    # convert the list to a DataFrame    
+    # convert the list to a DataFrame
     _df = pd.DataFrame(
         _house_list,
         columns=COLUMNS,
     )
     return _df
 
+
 # %%
 # Data Preprocessing and Splitting
 # ===================================
@@ -122,16 +127,16 @@ def split_data(
 ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
 
     seed = seed
-    val_size = split[1]  # 0.3 
-    test_size = split[2] # 0.1
+    val_size = split[1]  # 0.3
+    test_size = split[2]  # 0.1
 
     num_samples = df.shape[0]
     # retain the features, skip the target column
-    x1 = df.values[:num_samples, 1:]  
+    x1 = df.values[:num_samples, 1:]
     # retain the target column
-    y1 = df.values[:num_samples, :1]  
+    y1 = df.values[:num_samples, :1]
 
-    # divide the features and target column into random train and test subsets, based on `test_size` 
+    # divide the features and target column into random train and test subsets, based on `test_size`
     x_train, x_test, y_train, y_test = train_test_split(
         x1, y1, test_size=test_size, random_state=seed
     )
@@ -139,7 +144,7 @@ def split_data(
     x_train, x_val, y_train, y_val = train_test_split(
         x_train,
         y_train,
-        test_size=(val_size / (1 - test_size)), # here, `test_size` computes to 0.3
+        test_size=(val_size / (1 - test_size)),  # here, `test_size` computes to 0.3
         random_state=seed,
     )
 
@@ -164,6 +169,7 @@ def split_data(
         ),
     )
 
+
 # %%
 # Next, we create a ``NamedTuple`` to map a variable name to its respective data type.
 dataset = typing.NamedTuple(
@@ -175,11 +181,14 @@ def split_data(
 
 # %%
 # We define a task to call the aforementioned functions.
+
+
 @task(cache=True, cache_version="0.1", limits=Resources(mem="600Mi"))
 def generate_and_split_data(number_of_houses: int, seed: int) -> dataset:
     _houses = gen_houses(number_of_houses)
     return split_data(_houses, seed, split=SPLIT_RATIOS)
 
+
 # %%
 # Training
 # ==========
@@ -261,4 +270,4 @@ def house_price_predictor_trainer(
 #
 # We can run the workflow locally provided the required libraries are installed. The output would be a list of house prices, generated using the XGBoost model.
 if __name__ == "__main__":
-    print(house_price_predictor_trainer())
\ No newline at end of file
+    print(house_price_predictor_trainer())
diff --git a/cookbook/case_studies/ml_training/house_price_prediction/multiregion_house_price_predictor.py b/cookbook/case_studies/ml_training/house_price_prediction/multiregion_house_price_predictor.py
index e1fe25097f..1c2eb9e71a 100644
--- a/cookbook/case_studies/ml_training/house_price_prediction/multiregion_house_price_predictor.py
+++ b/cookbook/case_studies/ml_training/house_price_prediction/multiregion_house_price_predictor.py
@@ -5,7 +5,7 @@
 
 In this tutorial, we will understand how to predict house prices in multiple regions using XGBoost, and :ref:`dynamic workflows <sphx_glr_auto_core_control_flow_dynamics.py>` in Flyte.
 
-We will split the generated dataset into train, test and validation set. 
+We will split the generated dataset into train, test and validation set.
 
 Next, we will create two dynamic workflows in Flyte, that will:
 
@@ -25,17 +25,9 @@
 # %%
 # We define a ``try-catch`` block to import data preprocessing functions from :ref:`here <Predicting House Price in a Region Using XGBoost>`.
 try:
-    from .house_price_predictor import (
-        generate_and_split_data,
-        fit,
-        predict,
-    )
+    from .house_price_predictor import fit, generate_and_split_data, predict
 except ImportError:
-    from house_price_predictor import (
-        generate_and_split_data,
-        fit,
-        predict,
-    )
+    from house_price_predictor import fit, generate_and_split_data, predict
 
 # %%
 # We initialize a variable to represent columns in the dataset. The other variables help generate the dataset.
@@ -64,7 +56,7 @@
 ]
 
 # %%
-# Data Generation and Preprocessing 
+# Data Generation and Preprocessing
 # ====================================
 # We call the :ref:`data generation <Data Generation>` and :ref:`data preprocessing <Data Preprocessing and Splitting>` functions to generate train, test, and validation data.
 # First, let's create a ``NamedTuple`` that maps variable names to their respective data types.
@@ -77,13 +69,15 @@
 
 # %%
 # Next, we create a :py:func:`~flytekit:flytekit.dynamic` workflow to generate and split the data for multiple regions.
+
+
 @dynamic(cache=True, cache_version="0.1", limits=Resources(mem="600Mi"))
 def generate_and_split_data_multiloc(
     locations: typing.List[str],
     number_of_houses_per_location: int,
     seed: int,
 ) -> dataset:
-    train_sets = [] # create empty lists for train, validation, and test subsets
+    train_sets = []  # create empty lists for train, validation, and test subsets
     val_sets = []
     test_sets = []
     for _ in locations:
@@ -99,7 +93,7 @@ def generate_and_split_data_multiloc(
         test_sets.append(
             _test,
         )
-    # split the dataset into train, validation, and test subsets   
+    # split the dataset into train, validation, and test subsets
     return train_sets, val_sets, test_sets
 
 
@@ -116,7 +110,7 @@ def parallel_fit_predict(
     multi_test: typing.List[pd.DataFrame],
 ) -> typing.List[typing.List[float]]:
     preds = []
-    
+
     # generate predictions for multiple regions
     for loc, train, val, test in zip(LOCATIONS, multi_train, multi_val, multi_test):
         model = fit(loc=loc, train=train, val=val)
@@ -149,6 +143,7 @@ def multi_region_house_price_prediction_model_trainer(
 
     return predictions
 
+
 # %%
 # Running the Model Locally
 # ==========================
diff --git a/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_and_gpu.py b/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_and_gpu.py
index a9ae93cf9b..3ebcc659bc 100644
--- a/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_and_gpu.py
+++ b/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_and_gpu.py
@@ -10,7 +10,7 @@
 provisioned to have GPU machines, Flyte will execute the task on a node that has GPU(s).
 
 Currently, Flyte does not provide any specific task type for PyTorch (though it is entirely possible to provide a task-type
-that supports *PyTorch-Ignite* or *PyTorch Lightening* support, but this is not critical). One can request for a GPU, simply
+that supports *PyTorch-Ignite* or *PyTorch Lightning* support, but this is not critical). One can request for a GPU, simply
 by setting GPU="1" resource request and then at runtime, the GPU will be provisioned.
 
 In this example, we'll see how we can create any PyTorch model, train it using Flyte and a specialized container. The following video will outline the basics of this process.
@@ -52,7 +52,10 @@
 # We'll call this function in the ``pytorch_mnist_task`` defined below.
 def wandb_setup():
     wandb.login()
-    wandb.init(project="mnist-single-node-single-gpu", entity=os.environ.get("WANDB_USERNAME", "my-user-name"))
+    wandb.init(
+        project="mnist-single-node-single-gpu",
+        entity=os.environ.get("WANDB_USERNAME", "my-user-name"),
+    )
 
 
 # %%
@@ -85,6 +88,7 @@ def forward(self, x):
 # The Data Loader
 # ===============
 
+
 def mnist_dataloader(batch_size, train=True, **kwargs):
     return torch.utils.data.DataLoader(
         datasets.MNIST(
@@ -188,9 +192,15 @@ def test(model, device, test_loader):
         for images, targets in test_loader:
             images, targets = images.to(device), targets.to(device)  # device conversion
             outputs = model(images)  # forward pass -- generate predictions
-            test_loss += F.nll_loss(outputs, targets, reduction="sum").item()  # sum up batch loss
-            _, predicted = torch.max(outputs.data, 1)  # get the index of the max log-probability
-            correct += (predicted == targets).sum().item()  # compare predictions to true label
+            test_loss += F.nll_loss(
+                outputs, targets, reduction="sum"
+            ).item()  # sum up batch loss
+            _, predicted = torch.max(
+                outputs.data, 1
+            )  # get the index of the max log-probability
+            correct += (
+                (predicted == targets).sum().item()
+            )  # compare predictions to true label
 
             # log predictions to the ``wandb`` table
             if log_counter < NUM_BATCHES_TO_LOG:
@@ -206,7 +216,9 @@ def test(model, device, test_loader):
     accuracy = float(correct) / len(test_loader.dataset)
 
     # log the average loss, accuracy, and table
-    wandb.log({"test_loss": test_loss, "accuracy": accuracy, "mnist_predictions": my_table})
+    wandb.log(
+        {"test_loss": test_loss, "accuracy": accuracy, "mnist_predictions": my_table}
+    )
 
     return accuracy
 
@@ -343,7 +355,9 @@ def pytorch_training_wf(
 # It is possible to run the model locally with almost no modifications (as long as the code takes care of resolving
 # if the code is distributed or not). This is how we can do it:
 if __name__ == "__main__":
-    model, accuracies = pytorch_training_wf(hp=Hyperparameters(epochs=10, batch_size=128))
+    model, accuracies = pytorch_training_wf(
+        hp=Hyperparameters(epochs=10, batch_size=128)
+    )
     print(f"Model: {model}, Accuracies: {accuracies}")
 
 # %%
diff --git a/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py b/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py
index dba6d33bd7..f05d92ad22 100644
--- a/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py
+++ b/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py
@@ -33,13 +33,15 @@
 import wandb
 from flytekit import Resources, task, workflow
 from flytekit.types.file import PythonPickledFile
+
 # %%
 # We'll re-use certain classes and functions from the
 # :ref:`single node and gpu tutorial <pytorch_single_node_and_gpu>`
 # such as the ``Net`` model architecture, ``Hyperparameters``, and ``log_test_predictions``.
-from mnist_classifier.pytorch_single_node_and_gpu import Net, Hyperparameters, log_test_predictions
+from mnist_classifier.pytorch_single_node_and_gpu import Hyperparameters, Net, log_test_predictions
 from torch import distributed as dist
-from torch import nn, multiprocessing as mp, optim
+from torch import multiprocessing as mp
+from torch import nn, optim
 from torchvision import datasets, transforms
 
 # %%
@@ -67,7 +69,10 @@
 # We'll call this function in the ``pytorch_mnist_task`` defined below.
 def wandb_setup():
     wandb.login()
-    wandb.init(project="mnist-single-node-multi-gpu", entity=os.environ.get("WANDB_USERNAME", "my-user-name"))
+    wandb.init(
+        project="mnist-single-node-multi-gpu",
+        entity=os.environ.get("WANDB_USERNAME", "my-user-name"),
+    )
 
 
 # %%
@@ -95,17 +100,32 @@ def download_mnist(data_dir):
 #
 # This function will be called in the training function to be distributed across all available GPUs. Note that
 # we set ``download=False`` here to avoid race conditions as mentioned above.
-def mnist_dataloader(data_dir, batch_size, train=True, distributed=False, rank=None, world_size=None, **kwargs):
+def mnist_dataloader(
+    data_dir,
+    batch_size,
+    train=True,
+    distributed=False,
+    rank=None,
+    world_size=None,
+    **kwargs,
+):
     dataset = datasets.MNIST(
         data_dir,
         train=train,
         download=False,
-        transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307), (0.3081))]),
+        transform=transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize((0.1307), (0.3081))]
+        ),
     )
     if distributed:
-        assert rank is not None, "rank needs to be specified when doing distributed training."
+        assert (
+            rank is not None
+        ), "rank needs to be specified when doing distributed training."
         sampler = torch.utils.data.distributed.DistributedSampler(
-            dataset, rank=rank, num_replicas=1 if world_size is None else world_size, shuffle=True
+            dataset,
+            rank=rank,
+            num_replicas=1 if world_size is None else world_size,
+            shuffle=True,
         )
     else:
         sampler = None
@@ -178,19 +198,27 @@ def test(model, rank, test_loader):
     with torch.no_grad():
 
         # loop through the test data loader
-        total = 0.
+        total = 0.0
         for images, targets in test_loader:
             total += len(targets)
             images, targets = images.to(rank), targets.to(rank)  # device conversion
             outputs = model(images)  # forward pass -- generate predictions
-            test_loss += F.nll_loss(outputs, targets, reduction="sum").item()  # sum up batch loss
-            _, predicted = torch.max(outputs.data, 1)  # get the index of the max log-probability
-            correct += (predicted == targets).sum().item()  # compare predictions to true label
+            test_loss += F.nll_loss(
+                outputs, targets, reduction="sum"
+            ).item()  # sum up batch loss
+            _, predicted = torch.max(
+                outputs.data, 1
+            )  # get the index of the max log-probability
+            correct += (
+                (predicted == targets).sum().item()
+            )  # compare predictions to true label
 
             # log predictions to the ``wandb`` table
             if log_counter < NUM_BATCHES_TO_LOG:
                 if rank == 0:
-                    log_test_predictions(images, targets, outputs, predicted, my_table, log_counter)
+                    log_test_predictions(
+                        images, targets, outputs, predicted, my_table, log_counter
+                    )
                 log_counter += 1
 
     # compute the average loss
@@ -200,7 +228,13 @@ def test(model, rank, test_loader):
     if rank == 0:
         print("\ntest_loss={:.4f}\naccuracy={:.4f}\n".format(test_loss, accuracy))
         # log the average loss, accuracy, and table
-        wandb.log({"test_loss": test_loss, "accuracy": accuracy, "mnist_predictions": my_table})
+        wandb.log(
+            {
+                "test_loss": test_loss,
+                "accuracy": accuracy,
+                "mnist_predictions": my_table,
+            }
+        )
 
     return accuracy
 
@@ -223,6 +257,7 @@ def test(model, rank, test_loader):
 # ``dist_setup`` is a helper function that instantiates a distributed environment. We're pointing all of the
 # processes across all available GPUs to the address of the main process.
 
+
 def dist_setup(rank, world_size, backend):
     os.environ["MASTER_ADDR"] = "localhost"
     os.environ["MASTER_PORT"] = "8888"
@@ -244,6 +279,7 @@ def dist_setup(rank, world_size, backend):
 # - save the trained model to disk
 # - keep track of validation metrics
 
+
 def train_mnist(rank: int, world_size: int, hp: Hyperparameters):
     # store the hyperparameters' config in ``wandb``
     if rank == 0:
@@ -264,16 +300,26 @@ def train_mnist(rank: int, world_size: int, hp: Hyperparameters):
     kwargs = {"num_workers": 0, "pin_memory": True} if use_cuda else {}
     print("Getting data loaders")
     training_data_loader = mnist_dataloader(
-        DATA_DIR, hp.batch_size, train=True, distributed=use_cuda, rank=rank, world_size=world_size, **kwargs
+        DATA_DIR,
+        hp.batch_size,
+        train=True,
+        distributed=use_cuda,
+        rank=rank,
+        world_size=world_size,
+        **kwargs,
+    )
+    test_data_loader = mnist_dataloader(
+        DATA_DIR, hp.test_batch_size, train=False, **kwargs
     )
-    test_data_loader = mnist_dataloader(DATA_DIR, hp.test_batch_size, train=False, **kwargs)
 
     # define the distributed model and optimizer
     print("Defining model")
     model = Net().cuda(rank)
     model = nn.parallel.DistributedDataParallel(model, device_ids=[rank])
 
-    optimizer = optim.SGD(model.parameters(), lr=hp.learning_rate, momentum=hp.sgd_momentum)
+    optimizer = optim.SGD(
+        model.parameters(), lr=hp.learning_rate, momentum=hp.sgd_momentum
+    )
 
     # train the model: run multiple epochs and capture the accuracies for each epoch
     print(f"Training for {hp.epochs} epochs")
@@ -349,8 +395,12 @@ def train_mnist(rank: int, world_size: int, hp: Hyperparameters):
     retries=2,
     cache=True,
     cache_version="1.2",
-    requests=Resources(gpu=gpu, mem=mem, storage=storage, ephemeral_storage=ephemeral_storage),
-    limits=Resources(gpu=gpu, mem=mem, storage=storage, ephemeral_storage=ephemeral_storage),
+    requests=Resources(
+        gpu=gpu, mem=mem, storage=storage, ephemeral_storage=ephemeral_storage
+    ),
+    limits=Resources(
+        gpu=gpu, mem=mem, storage=storage, ephemeral_storage=ephemeral_storage
+    ),
 )
 def pytorch_mnist_task(hp: Hyperparameters) -> TrainingOutputs:
     print("Start MNIST training:")
@@ -375,7 +425,9 @@ def pytorch_mnist_task(hp: Hyperparameters) -> TrainingOutputs:
 # %%
 # Finally, we define a workflow to run the training algorithm. We return the model and accuracies.
 @workflow
-def pytorch_training_wf(hp: Hyperparameters = Hyperparameters(epochs=10, batch_size=128)) -> TrainingOutputs:
+def pytorch_training_wf(
+    hp: Hyperparameters = Hyperparameters(epochs=10, batch_size=128)
+) -> TrainingOutputs:
     return pytorch_mnist_task(hp=hp)
 
 
@@ -386,7 +438,9 @@ def pytorch_training_wf(hp: Hyperparameters = Hyperparameters(epochs=10, batch_s
 # It is possible to run the model locally with almost no modifications (as long as the code takes care of resolving
 # if the code is distributed or not). This is how to do it:
 if __name__ == "__main__":
-    model, accuracies = pytorch_training_wf(hp=Hyperparameters(epochs=10, batch_size=128))
+    model, accuracies = pytorch_training_wf(
+        hp=Hyperparameters(epochs=10, batch_size=128)
+    )
     print(f"Model: {model}, Accuracies: {accuracies}")
 
 # %%
diff --git a/cookbook/case_studies/ml_training/pima_diabetes/diabetes.py b/cookbook/case_studies/ml_training/pima_diabetes/diabetes.py
index 714669a1ac..405775136c 100644
--- a/cookbook/case_studies/ml_training/pima_diabetes/diabetes.py
+++ b/cookbook/case_studies/ml_training/pima_diabetes/diabetes.py
@@ -10,6 +10,7 @@
 import typing
 from collections import OrderedDict
 from dataclasses import dataclass
+from typing import Tuple
 
 import joblib
 import pandas as pd
@@ -20,7 +21,6 @@
 from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
 from xgboost import XGBClassifier
-from typing import Tuple
 
 # %%
 # Since we are working with a specific dataset, we will create a strictly typed schema for the dataset.
@@ -158,7 +158,8 @@ def fit(
 
 @task(cache_version="1.0", cache=True, limits=Resources(mem="200Mi"))
 def predict(
-    x: FlyteSchema[FEATURE_COLUMNS], model_ser: FlyteFile[MODELSER_JOBLIB],
+    x: FlyteSchema[FEATURE_COLUMNS],
+    model_ser: FlyteFile[MODELSER_JOBLIB],
 ) -> FlyteSchema[CLASSES_COLUMNS]:
     """
     Given a any trained model, serialized using joblib (this method can be shared!) and features, this method returns
@@ -207,7 +208,11 @@ def diabetes_xgboost_model(
     x_train, x_test, y_train, y_test = split_traintest_dataset(
         dataset=dataset, seed=seed, test_split_ratio=test_split_ratio
     )
-    model = fit(x=x_train, y=y_train, hyperparams=XGBoostModelHyperparams(max_depth=4),)
+    model = fit(
+        x=x_train,
+        y=y_train,
+        hyperparams=XGBoostModelHyperparams(max_depth=4),
+    )
     predictions = predict(x=x_test, model_ser=model.model)
     return model.model, score(predictions=predictions, y=y_test)
 
diff --git a/cookbook/case_studies/ml_training/spark_horovod/keras_spark_rossmann_estimator.py b/cookbook/case_studies/ml_training/spark_horovod/keras_spark_rossmann_estimator.py
index 82223db53f..1c0422c05c 100644
--- a/cookbook/case_studies/ml_training/spark_horovod/keras_spark_rossmann_estimator.py
+++ b/cookbook/case_studies/ml_training/spark_horovod/keras_spark_rossmann_estimator.py
@@ -4,8 +4,8 @@
 Data-Parallel Distributed Training Using Horovod on Spark
 ---------------------------------------------------------
 
-When time- and compute-intensive deep learning workloads need to be trained efficiently, data-parallel distributed training comes to the rescue. 
-This technique parallelizes the data and requires sharing of weights between different worker nodes involved in the distributed training after every epoch, which ensures that all worker nodes train a consistent model. 
+When time- and compute-intensive deep learning workloads need to be trained efficiently, data-parallel distributed training comes to the rescue.
+This technique parallelizes the data and requires sharing of weights between different worker nodes involved in the distributed training after every epoch, which ensures that all worker nodes train a consistent model.
 Overall, data-parallel distributed training can help speed up the execution time.
 
 In this tutorial, we will understand how data-parallel distributed training works with Flyte, Horovod, and Spark.
@@ -14,7 +14,7 @@
 Lastly, we will build a Keras model and perform distributed training using Horovod's `KerasEstimator API <https://github.com/horovod/horovod/blob/8d34c85ce7ec76e81fb3be99418b0e4d35204dc3/horovod/spark/keras/estimator.py#L88>`__.
 
 Before executing the code, create `work_dir`, an s3 bucket.
- 
+
 Let's get started with the example!
 
 """
@@ -44,16 +44,7 @@
 from horovod.spark.common.store import Store
 from horovod.tensorflow.keras.callbacks import BestModelCheckpoint
 from pyspark import Row
-from tensorflow.keras.layers import (
-    BatchNormalization,
-    Concatenate,
-    Dense,
-    Dropout,
-    Embedding,
-    Flatten,
-    Input,
-    Reshape,
-)
+from tensorflow.keras.layers import BatchNormalization, Concatenate, Dense, Dropout, Embedding, Flatten, Input, Reshape
 
 # %%
 # We define two variables to represent categorical and continuous columns in the dataset.
@@ -101,6 +92,8 @@
 
 # %%
 # Next, let's initialize a data class to store the hyperparameters that will be used with the model (``epochs``, ``learning_rate``, ``batch_size``, etc.).
+
+
 @dataclass_json
 @dataclass
 class Hyperparameters:
@@ -112,6 +105,7 @@ class Hyperparameters:
     local_checkpoint_file: str = "checkpoint.h5"
     local_submission_csv: str = "submission.csv"
 
+
 # %%
 # Downloading the Data
 # ====================
@@ -155,11 +149,12 @@ def download_data(dataset: str) -> FlyteDirectory:
     # return the directory populated with Rossmann data files
     return FlyteDirectory(path=str(data_dir))
 
+
 # %%
 # Data Preprocessing
 # =====================
 #
-# 1. Let's start with cleaning and preparing the Google trend data. We create new 'Date' and 'State' columns using PySpark's ``withColumn``. These columns, in addition to other features, will contribute to the prediction of sales.  
+# 1. Let's start with cleaning and preparing the Google trend data. We create new 'Date' and 'State' columns using PySpark's ``withColumn``. These columns, in addition to other features, will contribute to the prediction of sales.
 def prepare_google_trend(
     google_trend_csv: pyspark.sql.DataFrame,
 ) -> pyspark.sql.DataFrame:
@@ -180,6 +175,7 @@ def prepare_google_trend(
     # expand dates
     return expand_date(google_trend_all)
 
+
 # %%
 # 2. Next, we set a few date-specific values in the DataFrame to analyze the seasonal effects on sales.
 def expand_date(df: pyspark.sql.DataFrame) -> pyspark.sql.DataFrame:
@@ -191,6 +187,7 @@ def expand_date(df: pyspark.sql.DataFrame) -> pyspark.sql.DataFrame:
         .withColumn("Day", F.dayofmonth(df.Date))
     )
 
+
 # %%
 # 3. We retrieve the number of days before/after a special event (such as a promo or holiday). This data helps analyze how the sales may vary before/after a special event.
 def add_elapsed(df: pyspark.sql.DataFrame, cols: List[str]) -> pyspark.sql.DataFrame:
@@ -221,6 +218,7 @@ def fn(rows):
         df = rdd.toDF()
     return df
 
+
 # %%
 # 4. We define a function to merge several Spark DataFrames into a single DataFrame to create training and test data.
 def prepare_df(
@@ -233,7 +231,7 @@ def prepare_df(
 ) -> pyspark.sql.DataFrame:
     num_rows = df.count()
 
-    # expand dates 
+    # expand dates
     df = expand_date(df)
 
     # create new columns in the DataFrame by filtering out special events(promo/holiday where sales was zero or store was closed).
@@ -327,6 +325,7 @@ def prepare_df(
     assert num_rows == df.count(), "lost rows in joins"
     return df
 
+
 # %%
 # 5. We build a dictionary of sorted, distinct categorical variables to create an embedding layer in our Keras model.
 def build_vocabulary(df: pyspark.sql.DataFrame) -> Dict[str, List[Any]]:
@@ -338,6 +337,7 @@ def build_vocabulary(df: pyspark.sql.DataFrame) -> Dict[str, List[Any]]:
         vocab[col] = sorted(values, key=lambda x: x or default_value)
     return vocab
 
+
 # %%
 # 6. Next, we cast continuous columns to float as part of data preprocessing.
 def cast_columns(df: pyspark.sql.DataFrame, cols: List[str]) -> pyspark.sql.DataFrame:
@@ -345,6 +345,7 @@ def cast_columns(df: pyspark.sql.DataFrame, cols: List[str]) -> pyspark.sql.Data
         df = df.withColumn(col, F.coalesce(df[col].cast(T.FloatType()), F.lit(0.0)))
     return df
 
+
 # %%
 # 7. Lastly, define a function that returns a list of values based on a key.
 def lookup_columns(
@@ -360,6 +361,7 @@ def fn(v):
         df = df.withColumn(col, lookup(mapping)(df[col]))
     return df
 
+
 # %%
 # The ``data_preparation`` function consolidates all the aforementioned data processing functions.
 def data_preparation(
@@ -447,7 +449,7 @@ def data_preparation(
         .cache(),
     )
 
-    # cast continuous columns to float 
+    # cast continuous columns to float
     train_df = cast_columns(train_df, CONTINUOUS_COLS + ["Sales"])
     # search for a key and return a list of values based on a key
     train_df = lookup_columns(train_df, vocab)
@@ -492,6 +494,7 @@ def data_preparation(
 
     return max_sales, vocab, train_df, test_df
 
+
 # %%
 # Training
 # ===========
@@ -581,7 +584,7 @@ def act_sigmoid_scaled(x):
     # create an object of Store class
     store = Store.create(work_dir.remote_source)
     # 'SparkBackend' uses `horovod.spark.run` to execute the distributed training function, and
-    # returns a list of results by running 'train' on every worker in the cluster    
+    # returns a list of results by running 'train' on every worker in the cluster
     backend = SparkBackend(
         num_proc=hp.num_proc,
         stdout=sys.stdout,
@@ -613,7 +616,7 @@ def act_sigmoid_scaled(x):
     # the user would provide a Keras model to the `KerasEstimator``
     # this `KerasEstimator`` will fit the data and store it in a Spark DataFrame
     keras_model = keras_estimator.fit(train_df).setOutputCols(["Sales_output"])
-    # retrieve the model training history 
+    # retrieve the model training history
     history = keras_model.getHistory()
     best_val_rmspe = min(history["val_exp_rmspe"])
     print("Best RMSPE: %f" % best_val_rmspe)
@@ -624,7 +627,8 @@ def act_sigmoid_scaled(x):
         "Written checkpoint to %s" % os.path.join(working_dir, hp.local_checkpoint_file)
     )
     # the Estimator returns a Transformer representation of the trained model once training is complete
-    return keras_model 
+    return keras_model
+
 
 # %%
 # Evaluation
@@ -659,6 +663,7 @@ def test(
 
     return working_dir
 
+
 # %%
 # Defining the Spark Task
 # ========================
@@ -668,7 +673,7 @@ def test(
 # Within the task, let's call the data pre-processing, training, and evaluation functions.
 #
 # .. note::
-#      
+#
 #   To set up Spark, refer to :ref:`flyte-and-spark`.
 #
 @task(
@@ -710,6 +715,7 @@ def horovod_spark_task(
     # generate predictions
     return test(keras_model, working_dir, test_df, hp)
 
+
 # %%
 # Lastly, we define a workflow to run the pipeline.
 @workflow
@@ -723,6 +729,7 @@ def horovod_spark_wf(
     # work_dir corresponds to the Horovod-Spark store
     return horovod_spark_task(data_dir=data_dir, hp=hp, work_dir=work_dir)
 
+
 # %%
 # Running the Model Locally
 # ==========================
diff --git a/cookbook/common/leaf.mk b/cookbook/common/leaf.mk
index da6cea8ea7..63a0be557f 100644
--- a/cookbook/common/leaf.mk
+++ b/cookbook/common/leaf.mk
@@ -15,7 +15,7 @@ ifdef SANDBOX
 # The url for Flyte Control plane
 export FLYTE_HOST ?= localhost:30081
 
-# Overrides s3 url. This is solely needed for SANDBOX deployments. Shouldn't be overriden in production AWS S3.
+# Overrides s3 url. This is solely needed for SANDBOX deployments. Shouldn't be overridden in production AWS S3.
 export FLYTE_AWS_ENDPOINT ?= http://localhost:30084/
 
 # Used to authenticate to s3. For a production AWS S3, it's discouraged to use keys and key ids.
diff --git a/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py b/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py
index a523ca1037..a932f7df60 100644
--- a/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py
+++ b/cookbook/core/containerization/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py
@@ -1,29 +1,32 @@
 import math
 import sys
 
+
 def read_input(input_dir, v):
-    with open(f'{input_dir}/{v}', 'r') as f:
+    with open(f"{input_dir}/{v}", "r") as f:
         return float(f.read())
 
+
 def write_output(output_dir, output_file, v):
-    with open(f'{output_dir}/{output_file}', 'w') as f:
+    with open(f"{output_dir}/{output_file}", "w") as f:
         f.write(str(v))
 
+
 def calculate_area(a, b):
     return math.pi * a * b
 
 
 def main(input_dir, output_dir):
-    a = read_input(input_dir, 'a')
-    b = read_input(input_dir, 'b')
+    a = read_input(input_dir, "a")
+    b = read_input(input_dir, "b")
 
     area = calculate_area(a, b)
 
-    write_output(output_dir, 'area', area)
-    write_output(output_dir, 'metadata', '[from python rawcontainer]')
+    write_output(output_dir, "area", area)
+    write_output(output_dir, "metadata", "[from python rawcontainer]")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     input_dir = sys.argv[1]
     output_dir = sys.argv[2]
 
diff --git a/cookbook/core/containerization/raw_container.py b/cookbook/core/containerization/raw_container.py
index ac8fdffa35..28be59d3c4 100644
--- a/cookbook/core/containerization/raw_container.py
+++ b/cookbook/core/containerization/raw_container.py
@@ -12,9 +12,8 @@
 Refer to the raw protocol to understand how to leverage this.
 """
 import logging
-from typing import Tuple, Any, Mapping, List, Set
-from flytekit import task, workflow
-from flytekit import ContainerTask, kwtypes, workflow
+
+from flytekit import ContainerTask, kwtypes, task, workflow
 
 logger = logging.getLogger(__file__)
 
diff --git a/cookbook/core/containerization/spot_instances.py b/cookbook/core/containerization/spot_instances.py
index 2f8639b8b3..00575922ae 100644
--- a/cookbook/core/containerization/spot_instances.py
+++ b/cookbook/core/containerization/spot_instances.py
@@ -34,7 +34,7 @@
 # What Are Interruptible Tasks?
 # =============================
 #
-# If specified, the ``interruptible flag`` is added to the task definition and signals to the Flyte engine that it may be scheduled on machines that may be preempted, such as AWS spot instances. This is low-hanging fruit for any cost-savings initiative. 
+# If specified, the ``interruptible flag`` is added to the task definition and signals to the Flyte engine that it may be scheduled on machines that may be preempted, such as AWS spot instances. This is low-hanging fruit for any cost-savings initiative.
 #
 # Setting Interruptible
 # ^^^^^^^^^^^^^^^^^^^^^
diff --git a/cookbook/core/containerization/use_secrets.py b/cookbook/core/containerization/use_secrets.py
index 94abb295a6..77e98599c5 100644
--- a/cookbook/core/containerization/use_secrets.py
+++ b/cookbook/core/containerization/use_secrets.py
@@ -22,10 +22,10 @@
 2. Flyte will apply labels and annotations that are referenced to all secrets the task is requesting access to.
 3. Flyte will send a POST request to ApiServer to create the object.
 4. Before persisting the Pod, ApiServer will invoke all registered Pod Webhooks. Flyte's Pod Webhook will be called.
-5. Flyte Pod Webhook will then, using the labels and annotiations attached in step 2, lookup globally mounted secrets for each of the requested secrets. 
+5. Flyte Pod Webhook will then, using the labels and annotiations attached in step 2, lookup globally mounted secrets for each of the requested secrets.
 6. If found, Pod Webhook will mount them directly in the Pod. If not found, it will inject the appropriate annotations to load the secrets for K8s (or Vault or Confidant or any other secret management system plugin configured) into the task pod.
 
-Once the secret is injected into the task pod, Flytekit can read it using the secret manager (see examples below). 
+Once the secret is injected into the task pod, Flytekit can read it using the secret manager (see examples below).
 
 The webhook is included in all overlays in the Flytekit repo. The deployment file creates (mainly) two things; a Job and a Deployment.
 
@@ -35,41 +35,41 @@
 Secret Discovery
 ----------------
 
-Flyte identifies secrets using a secret group and a secret key. 
+Flyte identifies secrets using a secret group and a secret key.
 In a task decorator you request a secret like this: ``@task(secret_requests=[Secret(group=SECRET_GROUP, key=SECRET_NAME)])``
 Flytekit provides a shorthand for loading the requested secret inside a task: ``secret = flytekit.current_context().secrets.get(SECRET_GROUP, SECRET_NAME)``
-See the python examples further down for more details on how to request and use secrets in a task. 
+See the python examples further down for more details on how to request and use secrets in a task.
 
 Flytekit relies on the following environment variables to load secrets (defined `here <https://github.com/flyteorg/flytekit/blob/master/flytekit/configuration/secrets.py>`_). When running tasks and workflows locally you should make sure to store your secrets accordingly or to modify these:
 - FLYTE_SECRETS_DEFAULT_DIR - The directory Flytekit searches for secret files, default: "/etc/secrets"
 - FLYTE_SECRETS_FILE_PREFIX - a common file prefix for Flyte secrets, default: ""
 - FLYTE_SECRETS_ENV_PREFIX - a common env var prefix for Flyte secrets, default: "_FSEC_"
 
-When running a workflow on a Flyte cluster, the configured secret manager will use the secret Group and Key to try and retrieve a secret. 
+When running a workflow on a Flyte cluster, the configured secret manager will use the secret Group and Key to try and retrieve a secret.
 If successful, it will make the secret available as either file or environment variable and will if necessary modify the above variables automatically so that the task can load and use the secrets.
 
 Configuring a secret management system plugin into use
 ------------------------------------------------------
 
-When a task requests a secret Flytepropeller will try to retrieve secrets in the following order: 1.) checking for global secrets (secrets mounted as files or environment variables on the flyte-pod-webhook pod) and 2.) checking with an additional configurable secret manager. 
-Note that the global secrets take precedence over any secret discoverable by the secret manager plugins. 
+When a task requests a secret Flytepropeller will try to retrieve secrets in the following order: 1.) checking for global secrets (secrets mounted as files or environment variables on the flyte-pod-webhook pod) and 2.) checking with an additional configurable secret manager.
+Note that the global secrets take precedence over any secret discoverable by the secret manager plugins.
 
-The following additional secret managers are available at the time of writing: 
+The following additional secret managers are available at the time of writing:
 - `K8s secrets <https://kubernetes.io/docs/concepts/configuration/secret/>`_ (default) - flyte-pod-webhook will try to look for a K8s secret named after the secret Group and retrieve the value for the secret Key.
 - AWS Secret Manager - flyte-pod-webhook will add the AWS Secret Manager sidecar container to a task Pod which will mount the secret.
 - `Vault Agent Injector <https://www.vaultproject.io/docs/platform/k8s/injector>`_ - flyte-pod-webhook will annotate the task Pod with the respective Vault annotations that trigger an existing Vault Agent Injector to retrieve the specified secret Key from a vault path defined as secret Group.
 
-You can configure the additional secret manager by defining `secretManagerType` to be either 'K8s', 'AWS' or 'Vault' in 
+You can configure the additional secret manager by defining `secretManagerType` to be either 'K8s', 'AWS' or 'Vault' in
 the `core config <https://github.com/flyteorg/flyte/blob/master/kustomize/base/single_cluster/headless/config/propeller/core.yaml#L34>` of the Flytepropeller.
 
 When using the K8s secret manager plugin (enabled by default), the secrets need to be available in the same namespace as the task execution
-(for example `flytesnacks-development`). K8s secrets can be mounted as either files or injected as environment variables into the task pod, 
-so if you need to make larger files available to the task, then this might be the better option. 
-Furthermore, this method also allows you to have separate credentials for different domains but still using the same name for the secret. 
-The `group` of the secret request corresponds to the K8s secret name, while the `name` of the request corresponds to the key of the specific entry in the secret.    
+(for example `flytesnacks-development`). K8s secrets can be mounted as either files or injected as environment variables into the task pod,
+so if you need to make larger files available to the task, then this might be the better option.
+Furthermore, this method also allows you to have separate credentials for different domains but still using the same name for the secret.
+The `group` of the secret request corresponds to the K8s secret name, while the `name` of the request corresponds to the key of the specific entry in the secret.
 
 When using the Vault secret manager, make sure you have Vault Agent deployed on your cluster (`step-by-step tutorial <https://learn.hashicorp.com/tutorials/vault/kubernetes-sidecar>`_).
-Vault secrets can only be mounted as files and will become available under "/etc/flyte/secrets/SECRET_GROUP/SECRET_NAME". Vault comes with `two versions <https://www.vaultproject.io/docs/secrets/kv>`_ of the key-value secret store. 
+Vault secrets can only be mounted as files and will become available under "/etc/flyte/secrets/SECRET_GROUP/SECRET_NAME". Vault comes with `two versions <https://www.vaultproject.io/docs/secrets/kv>`_ of the key-value secret store.
 By default the Vault secret manager will try to retrieve Version 2 secrets. You can specify the KV version by setting webhook.vaultSecretManager.kvVersion in the configmap. Note that the version number needs to be an explicit string (e.g. "1").
 You can also configure the Vault role under which Flyte will try to read the secret by setting webhook.vaultSecretManager.role (default: "flyte").
 
@@ -84,12 +84,14 @@
 
 # %%
 import os
-import flytekit
 from typing import Tuple
 
+import flytekit
+
 # %%
 # Flytekit exposes a type/class called Secrets. It can be imported as follows.
 from flytekit import Secret, task, workflow
+from flytekit.testing import SecretsManager
 
 # %%
 # Secrets consists of a name and an enum that indicates how the secrets will be accessed. If the mounting_requirement is
@@ -106,10 +108,10 @@
 
 
 # %%
-# Now declare the secret in the requests. The request tells Flyte to make the secret available to the task. The secret can 
-# then be accessed inside the task using the :py:class:`flytekit.ExecutionParameters`, through the global flytekit 
-# context as shown below. At runtime, flytekit looks inside the task pod for an environment variable or a mounted file with 
-# a predefined name/path and loads the value. 
+# Now declare the secret in the requests. The request tells Flyte to make the secret available to the task. The secret can
+# then be accessed inside the task using the :py:class:`flytekit.ExecutionParameters`, through the global flytekit
+# context as shown below. At runtime, flytekit looks inside the task pod for an environment variable or a mounted file with
+# a predefined name/path and loads the value.
 @task(secret_requests=[Secret(group=SECRET_GROUP, key=SECRET_NAME)])
 def secret_task() -> str:
     secret_val = flytekit.current_context().secrets.get(SECRET_GROUP, SECRET_NAME)
@@ -138,9 +140,15 @@ def secret_task() -> str:
 # %%
 # The Secret structure allows passing two fields, matching the key and the group, as previously described:
 @task(
-    secret_requests=[Secret(key=USERNAME_SECRET, group=SECRET_GROUP), Secret(key=PASSWORD_SECRET, group=SECRET_GROUP)])
+    secret_requests=[
+        Secret(key=USERNAME_SECRET, group=SECRET_GROUP),
+        Secret(key=PASSWORD_SECRET, group=SECRET_GROUP),
+    ]
+)
 def user_info_task() -> Tuple[str, str]:
-    secret_username = flytekit.current_context().secrets.get(SECRET_GROUP, USERNAME_SECRET)
+    secret_username = flytekit.current_context().secrets.get(
+        SECRET_GROUP, USERNAME_SECRET
+    )
     secret_pwd = flytekit.current_context().secrets.get(SECRET_GROUP, PASSWORD_SECRET)
     # Please do not print the secret value, this is just a demonstration.
     print(f"{secret_username}={secret_pwd}")
@@ -153,7 +161,15 @@ def user_info_task() -> Tuple[str, str]:
 # keys (certs etc). Another reason may be that a dependent library necessitates that the secret be available as a file.
 # In these scenarios you can specify the mount_requirement. In the following example we force the mounting to be
 # an Env variable
-@task(secret_requests=[Secret(group=SECRET_GROUP, key=SECRET_NAME, mount_requirement=Secret.MountType.ENV_VAR)])
+@task(
+    secret_requests=[
+        Secret(
+            group=SECRET_GROUP,
+            key=SECRET_NAME,
+            mount_requirement=Secret.MountType.ENV_VAR,
+        )
+    ]
+)
 def secret_file_task() -> Tuple[str, str]:
     # SM here is a handle to the secrets manager
     sm = flytekit.current_context().secrets
@@ -176,13 +192,16 @@ def my_secret_workflow() -> Tuple[str, str, str, str, str]:
 # %%
 # The simplest way to test Secret accessibility is to export the secret as an environment variable. There are some
 # helper methods available to do so
-from flytekit.testing import SecretsManager
 
 if __name__ == "__main__":
     sec = SecretsManager()
     os.environ[sec.get_secrets_env_var(SECRET_GROUP, SECRET_NAME)] = "value"
-    os.environ[sec.get_secrets_env_var(SECRET_GROUP, USERNAME_SECRET)] = "username_value"
-    os.environ[sec.get_secrets_env_var(SECRET_GROUP, PASSWORD_SECRET)] = "password_value"
+    os.environ[
+        sec.get_secrets_env_var(SECRET_GROUP, USERNAME_SECRET)
+    ] = "username_value"
+    os.environ[
+        sec.get_secrets_env_var(SECRET_GROUP, PASSWORD_SECRET)
+    ] = "password_value"
     x, y, z, f, s = my_secret_workflow()
     assert x == "value"
     assert y == "username_value"
diff --git a/cookbook/core/control_flow/chain_tasks.py b/cookbook/core/control_flow/chain_tasks.py
index 4e5adcfe8d..1f35973815 100644
--- a/cookbook/core/control_flow/chain_tasks.py
+++ b/cookbook/core/control_flow/chain_tasks.py
@@ -9,15 +9,18 @@
 In this example, let's enforce an order for ``read()`` to happen after ``write()``.
 """
 
+import pandas as pd
+
 # %%
 # First, we import the necessary dependencies.
 from flytekit import task, workflow
 from flytekit.core.node_creation import create_node
-import pandas as pd
 
 DATABASE = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
 # %%
 # We define a ``read()`` task to read from the file.
+
+
 @task
 def read() -> pd.DataFrame:
     data = pd.read_csv(DATABASE)
@@ -29,7 +32,7 @@ def read() -> pd.DataFrame:
 @task
 def write():
     # dummy code
-    df = pd.DataFrame(
+    df = pd.DataFrame(  # noqa : F841
         data={
             "sepal_length": [5.3],
             "sepal_width": [3.8],
diff --git a/cookbook/core/control_flow/checkpoint.py b/cookbook/core/control_flow/checkpoint.py
index dda62e06a0..7709ea9982 100644
--- a/cookbook/core/control_flow/checkpoint.py
+++ b/cookbook/core/control_flow/checkpoint.py
@@ -4,7 +4,7 @@
 
 .. note::
 
-  This feature is available from Flytekit version 0.30.0b6+ and needs a Flyte backend version of atleast 0.19.0+.
+  This feature is available from Flytekit version 0.30.0b6+ and needs a Flyte backend version of at least 0.19.0+.
 
 A checkpoint recovers a task from a previous failure by recording the state of a task before the failure and
 resuming from the latest recorded state.
@@ -39,18 +39,17 @@
 significant fault-tolerance to ensure successful completion.
 
 But as the time for a task increases, the cost of re-running it increases, and reduces the chances of successful
-completion. This is where Flyte's intra-task checkpointing truly shines. 
+completion. This is where Flyte's intra-task checkpointing truly shines.
 
 Let's look at an example of how to develop tasks which utilize intra-task checkpointing. It only provides the low-level API, though. We intend to integrate
 higher-level checkpointing APIs available in popular training frameworks like Keras, Pytorch, Scikit-learn, and
 big-data frameworks like Spark and Flink to supercharge their fault-tolerance.
 """
 
-from flytekit import task, workflow, current_context
+from flytekit import current_context, task, workflow
 from flytekit.exceptions.user import FlyteRecoverableException
 
-
-RETRIES=3
+RETRIES = 3
 
 
 # %%
@@ -75,7 +74,9 @@ def use_checkpoint(n_iterations: int) -> int:
         # simulate a deterministic failure, for demonstration. We want to show how it eventually completes within
         # the given retries
         if i > start and i % failure_interval == 0:
-            raise FlyteRecoverableException(f"Failed at iteration {start}, failure_interval {failure_interval}")
+            raise FlyteRecoverableException(
+                f"Failed at iteration {start}, failure_interval {failure_interval}"
+            )
         # save progress state. It is also entirely possible save state every few intervals.
         cp.write(f"{i + 1}".encode())
 
@@ -83,7 +84,7 @@ def use_checkpoint(n_iterations: int) -> int:
 
 
 # %%
-# The workflow here simply calls the task. The task itself 
+# The workflow here simply calls the task. The task itself
 # will be retried for the :ref:`FlyteRecoverableException <flytekit:exception_handling>`.
 #
 @workflow
@@ -91,11 +92,11 @@ def example(n_iterations: int) -> int:
     return use_checkpoint(n_iterations=n_iterations)
 
 
-#%%
+# %%
 # The checkpoint is stored locally, but it is not used since retries are not supported.
 if __name__ == "__main__":
     try:
         example(n_iterations=10)
-    except RuntimeError as e:
+    except RuntimeError as e:  # noqa : F841
         # no retries are performed, so an exception is expected when run locally.
         pass
diff --git a/cookbook/core/control_flow/conditions.py b/cookbook/core/control_flow/conditions.py
index 944ffaee10..05d0507136 100644
--- a/cookbook/core/control_flow/conditions.py
+++ b/cookbook/core/control_flow/conditions.py
@@ -47,10 +47,10 @@ def double(n: float) -> float:
 def multiplier(my_input: float) -> float:
     return (
         conditional("fractions")
-            .if_((my_input >= 0.1) & (my_input <= 1.0))
-            .then(double(n=my_input))
-            .else_()
-            .then(square(n=my_input))
+        .if_((my_input >= 0.1) & (my_input <= 1.0))
+        .then(double(n=my_input))
+        .else_()
+        .then(square(n=my_input))
     )
 
 
@@ -73,12 +73,12 @@ def multiplier(my_input: float) -> float:
 def multiplier_2(my_input: float) -> float:
     return (
         conditional("fractions")
-            .if_((my_input > 0.1) & (my_input < 1.0))
-            .then(double(n=my_input))
-            .elif_((my_input > 1.0) & (my_input <= 10.0))
-            .then(square(n=my_input))
-            .else_()
-            .fail("The input must be between 0 and 10")
+        .if_((my_input > 0.1) & (my_input < 1.0))
+        .then(double(n=my_input))
+        .elif_((my_input > 1.0) & (my_input <= 10.0))
+        .then(square(n=my_input))
+        .else_()
+        .fail("The input must be between 0 and 10")
     )
 
 
@@ -94,12 +94,12 @@ def multiplier_2(my_input: float) -> float:
 def multiplier_3(my_input: float) -> float:
     result = (
         conditional("fractions")
-            .if_((my_input > 0.1) & (my_input < 1.0))
-            .then(double(n=my_input))
-            .elif_((my_input > 1.0) & (my_input < 10.0))
-            .then(square(n=my_input))
-            .else_()
-            .fail("The input must be between 0 and 10")
+        .if_((my_input > 0.1) & (my_input < 1.0))
+        .then(double(n=my_input))
+        .elif_((my_input > 1.0) & (my_input < 10.0))
+        .then(square(n=my_input))
+        .else_()
+        .fail("The input must be between 0 and 10")
     )
 
     # the 'result' will either be the output of `double` or `square`. If none of the conditions is true,
@@ -119,7 +119,7 @@ def multiplier_3(my_input: float) -> float:
 #
 # .. note::
 #
-#    How do output values get these methods? 
+#    How do output values get these methods?
 #    In a workflow, no output can be accessed directly. The inputs and outputs are auto-wrapped in a special object called :py:class:`flytekit.extend.Promise`.
 #
 # In this example, we create a biased coin whose seed can be controlled.
@@ -179,7 +179,9 @@ def bool_input_wf(b: bool) -> int:
     print("Running basic_boolean_wf a few times")
     for i in range(0, 5):
         print(f"Basic boolean wf output {basic_boolean_wf()}")
-        print(f"Boolean input {True if i < 2 else False}, workflow output {bool_input_wf(b=True if i < 2 else False)}")
+        print(
+            f"Boolean input {True if i < 2 else False}, workflow output {bool_input_wf(b=True if i < 2 else False)}"
+        )
 
 
 # %%
@@ -192,20 +194,20 @@ def bool_input_wf(b: bool) -> int:
 def nested_conditions(my_input: float) -> float:
     return (
         conditional("fractions")
-            .if_((my_input > 0.1) & (my_input < 1.0))
-            .then(
+        .if_((my_input > 0.1) & (my_input < 1.0))
+        .then(
             conditional("inner_fractions")
-                .if_(my_input < 0.5)
-                .then(double(n=my_input))
-                .elif_((my_input > 0.5) & (my_input < 0.7))
-                .then(square(n=my_input))
-                .else_()
-                .fail("Only <0.7 allowed")
-        )
-            .elif_((my_input > 1.0) & (my_input < 10.0))
+            .if_(my_input < 0.5)
+            .then(double(n=my_input))
+            .elif_((my_input > 0.5) & (my_input < 0.7))
             .then(square(n=my_input))
             .else_()
-            .then(double(n=my_input))
+            .fail("Only <0.7 allowed")
+        )
+        .elif_((my_input > 1.0) & (my_input < 10.0))
+        .then(square(n=my_input))
+        .else_()
+        .then(double(n=my_input))
     )
 
 
@@ -246,10 +248,10 @@ def consume_outputs(my_input: float, seed: int = 5) -> float:
     is_heads = coin_toss(seed=seed)
     res = (
         conditional("double_or_square")
-            .if_(is_heads.is_true())
-            .then(square(n=my_input))
-            .else_()
-            .then(calc_sum(a=my_input, b=my_input))
+        .if_(is_heads.is_true())
+        .then(square(n=my_input))
+        .else_()
+        .then(calc_sum(a=my_input, b=my_input))
     )
 
     # Regardless of the result, call ``double`` before
@@ -261,6 +263,8 @@ def consume_outputs(my_input: float, seed: int = 5) -> float:
 # The workflow can be executed locally.
 if __name__ == "__main__":
     print(
-        f"consume_outputs(0.4) with default seed=5. This should return output of calc_sum => {consume_outputs(my_input=0.4)}")
+        f"consume_outputs(0.4) with default seed=5. This should return output of calc_sum => {consume_outputs(my_input=0.4)}"
+    )
     print(
-        f"consume_outputs(0.4, seed=7), this should return output of square => {consume_outputs(my_input=0.4, seed=7)}")
+        f"consume_outputs(0.4, seed=7), this should return output of square => {consume_outputs(my_input=0.4, seed=7)}"
+    )
diff --git a/cookbook/core/control_flow/dynamics.py b/cookbook/core/control_flow/dynamics.py
index 1b16c89199..6e3fae6e88 100644
--- a/cookbook/core/control_flow/dynamics.py
+++ b/cookbook/core/control_flow/dynamics.py
@@ -28,6 +28,7 @@
 
 from flytekit import dynamic, task, workflow
 
+
 # %%
 # Next, we write a task that returns the index of a character (A-Z/a-z is equivalent to 0 to 25).
 @task
@@ -79,8 +80,8 @@ def derive_count(freq1: typing.List[int], freq2: typing.List[int]) -> int:
 # At execution (run) time, Flytekit runs the compilation step, and produces
 # a ``WorkflowTemplate`` (from the dynamic workflow), which Flytekit then passes back to Flyte Propeller for further running, exactly how sub-workflows are handled.
 #
-# .. note:: 
-#    The dynamic pattern isn't the most efficient method to iterate over a list. `Map tasks <https://github.com/flyteorg/flytekit/blob/8528268a29a07fe7e9ce9f7f08fea68c41b6a60b/flytekit/core/map_task.py/>`_ 
+# .. note::
+#    The dynamic pattern isn't the most efficient method to iterate over a list. `Map tasks <https://github.com/flyteorg/flytekit/blob/8528268a29a07fe7e9ce9f7f08fea68c41b6a60b/flytekit/core/map_task.py/>`_
 # might be more efficient in certain cases. But they only work for Python tasks (tasks decorated with the @task decorator) not SQL/Spark/etc,.
 #
 # We now define a dynamic workflow that encapsulates the above mentioned points.
@@ -91,21 +92,21 @@ def count_characters(s1: str, s2: str) -> int:
 
     # s1 and s2 are accessible
 
-    # initiliaze an empty list consisting of 26 empty slots corresponding to every alphabet (lower and upper case)
+    # initialize an empty list consisting of 26 empty slots corresponding to every alphabet (lower and upper case)
     freq1 = [0] * 26
     freq2 = [0] * 26
 
     # looping through the string s1
     for i in range(len(s1)):
 
-        # index and freq1 are not accesible as they are promises
+        # index and freq1 are not accessible as they are promises
         index = return_index(character=s1[i])
         freq1 = update_list(freq_list=freq1, list_index=index)
 
     # looping through the string s2
     for i in range(len(s2)):
 
-        # index and freq2 are not accesible as they are promises
+        # index and freq2 are not accessible as they are promises
         index = return_index(character=s2[i])
         freq2 = update_list(freq_list=freq2, list_index=index)
 
diff --git a/cookbook/core/control_flow/merge_sort.py b/cookbook/core/control_flow/merge_sort.py
index 59a603607f..ebda6a9ffd 100644
--- a/cookbook/core/control_flow/merge_sort.py
+++ b/cookbook/core/control_flow/merge_sort.py
@@ -15,9 +15,9 @@
 import typing
 from datetime import datetime
 from random import random, seed
+from typing import Tuple
 
 from flytekit import conditional, dynamic, task, workflow
-from typing import Tuple
 
 # seed random number generator
 seed(datetime.now().microsecond)
@@ -26,11 +26,12 @@
 # %%
 # A simple split function that divides a list into two halves.
 
+
 @task
 def split(numbers: typing.List[int]) -> Tuple[typing.List[int], typing.List[int], int]:
     return (
-        numbers[0:int(len(numbers) / 2)],
-        numbers[int(len(numbers) / 2):],
+        numbers[0 : int(len(numbers) / 2)],
+        numbers[int(len(numbers) / 2) :],
         int(len(numbers) / 2),
     )
 
@@ -124,8 +125,7 @@ def generate_inputs(numbers_count: int) -> typing.List[int]:
 # %%
 # The entire workflow can be executed locally as follows...
 if __name__ == "__main__":
-    print(f"Running Merge Sort Locally...")
     count = 20
     x = generate_inputs(count)
     print(x)
-    print(merge_sort(numbers=x, numbers_count=count))
+    print(f"Running Merge Sort Locally...{merge_sort(numbers=x, numbers_count=count)}")
diff --git a/cookbook/core/control_flow/subworkflows.py b/cookbook/core/control_flow/subworkflows.py
index 30aad652fa..d4fbf93ca1 100644
--- a/cookbook/core/control_flow/subworkflows.py
+++ b/cookbook/core/control_flow/subworkflows.py
@@ -2,21 +2,21 @@
 Subworkflows
 ------------
 
-Subworkflows are similar to :ref:`launch plans <Launch plans>`, since they allow users to kick off one workflow from inside another. 
+Subworkflows are similar to :ref:`launch plans <Launch plans>`, since they allow users to kick off one workflow from inside another.
 
-What's the difference? 
+What's the difference?
 Think of launch plans as pass by pointer and subworkflows as pass by value.
 
 .. note::
 
-    The reason why subworkflows exist is that this is how Flyte handles dynamic workflows. 
+    The reason why subworkflows exist is that this is how Flyte handles dynamic workflows.
     Instead of hiding this functionality, we expose it at the user level. There are pros and cons of
     using subworkflows as described below.
 
 When Should I Use SubWorkflows?
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 If you want to limit parallelism within a workflow and its launched sub-flows, subworkflows provide a clean way
-to achieve that because they execute within the same context of the parent workflow. 
+to achieve that because they execute within the same context of the parent workflow.
 Thus, all nodes of a subworkflow are constrained to the total constraint on the parent workflow.
 
 Consider this: When you include Workflow A as a subworkflow of Workflow B, and when Workflow B is run, the entire graph of workflow A is
@@ -31,6 +31,7 @@
 # We import the required dependencies into the environment.
 import typing
 from typing import Tuple
+
 from flytekit import task, workflow
 
 # %%
@@ -38,10 +39,12 @@
 # We usually try and define ``NamedTuple`` as a distinct type as a best practice (although it can be defined inline).
 op = typing.NamedTuple("OutputsBC", t1_int_output=int, c=str)
 
+
 @task
 def t1(a: int) -> op:
     return op(a + 2, "world")
 
+
 # %%
 # Then we define a subworkflow like a typical workflow that can run like any other workflow.
 @workflow
@@ -50,19 +53,21 @@ def my_subwf(a: int = 42) -> Tuple[str, str]:
     u, v = t1(a=x)
     return y, v
 
+
 # %%
 # We call the workflow declared above in a `parent` workflow below
-# which showcases how to override the node name of a task (or subworkflow in this case). 
-# 
+# which showcases how to override the node name of a task (or subworkflow in this case).
+#
 # Typically, nodes are just named sequentially: ``n0``, ``n1``, and so on. Since the inner ``my_subwf`` also has a ``n0``, you may
 # wish to change the name of the first one. Not changing the name is fine because Flyte automatically prepends an attribute
-# to the inner ``n0`` since node IDs must be distinct within a workflow graph. 
+# to the inner ``n0`` since node IDs must be distinct within a workflow graph.
 @workflow
 def parent_wf(a: int) -> Tuple[int, str, str]:
     x, y = t1(a=a).with_overrides(node_name="node-t1-parent")
     u, v = my_subwf(a=x)
     return x, u, v
 
+
 # %%
 # .. note::
 #      The with_overrides method provides a new name to the graph-node for better rendering or readability.
@@ -95,28 +100,29 @@ def nested_parent_wf(a: int) -> Tuple[int, str, str, str]:
 #
 # When launch plans are used within a workflow to launch the execution of a previously defined workflow, a new
 # external execution is launched, with a separate execution ID and can be observed as a distinct entity in
-# FlyteConsole/Flytectl. 
+# FlyteConsole/Flytectl.
 #
 # They may have separate parallelism constraints since the context is not shared.
 # We refer to such external invocations of a workflow using launch plans from a parent workflow as ``External Workflows``.
 #
 # .. tip::
-# 
+#
 #    If your deployment uses :ref:`multicluster-setup <Using Multiple Kubernetes Clusters>`, then external workflows may allow you to distribute the workload of a workflow to multiple clusters.
 #
 # Here is an example demonstrating external workflows:
 
 # %%
 # We import the required dependencies into the environment.
-import typing
-from typing import Tuple, Dict
-from flytekit import dynamic,task, workflow
-from flytekit import conditional, task, workflow, LaunchPlan
+import typing  # noqa: E402
+from collections import Counter  # noqa: E402
+from typing import Dict, Tuple  # noqa: E402
 
-from collections import Counter
+from flytekit import LaunchPlan, task, workflow  # noqa: E402
 
 # %%
 # We define a task that computes the frequency of every word in a string, and returns a dictionary mapping every word to its count.
+
+
 @task
 def count_freq_words(input_string1: str) -> Dict:
     # input_string = "The cat sat on the mat"
@@ -124,6 +130,7 @@ def count_freq_words(input_string1: str) -> Dict:
     wordCount = dict(Counter(words))
     return wordCount
 
+
 # %%
 # We define a workflow that executes the previously defined task.
 @workflow
@@ -131,27 +138,35 @@ def ext_workflow(my_input: str) -> Dict:
     result = count_freq_words(input_string1=my_input)
     return result
 
+
 # %%
 # Next, we create a launch plan.
-external_lp = LaunchPlan.get_or_create(ext_workflow, "parent_workflow_execution",)
+external_lp = LaunchPlan.get_or_create(
+    ext_workflow,
+    "parent_workflow_execution",
+)
 
 # %%
 # We define another task that returns the repeated keys (in our case, words) from a dictionary.
+
+
 @task
 def count_repetitive_words(word_counter: Dict) -> typing.List[str]:
     repeated_words = [key for key, value in word_counter.items() if value > 1]
     return repeated_words
 
+
 # %%
 # We define a workflow that triggers the launch plan of the previously-defined workflow.
 @workflow
 def parent_workflow(my_input1: str) -> typing.List[str]:
-    my_op1 = external_lp(my_input=my_input1) 
-    my_op2 = count_repetitive_words(word_counter = my_op1)
+    my_op1 = external_lp(my_input=my_input1)
+    my_op2 = count_repetitive_words(word_counter=my_op1)
     return my_op2
 
+
 # %%
 # Here, ``parent_workflow`` is an external workflow. This can be run locally too.
 if __name__ == "__main__":
     print("Running parent workflow...")
-    print(parent_workflow(my_input1= "the cat took the apple and ate the apple"))
\ No newline at end of file
+    print(parent_workflow(my_input1="the cat took the apple and ate the apple"))
diff --git a/cookbook/core/extend_flyte/backend_plugins.py b/cookbook/core/extend_flyte/backend_plugins.py
index 9ca2917dfa..8633471f4c 100644
--- a/cookbook/core/extend_flyte/backend_plugins.py
+++ b/cookbook/core/extend_flyte/backend_plugins.py
@@ -29,7 +29,7 @@
 This makes it possible to extend a task-template beyond the default supported targets -- :std:ref:`container <protos/docs/core/core:tasktemplate>` (WIP, sql etc).
 
 The motivation of the Custom field, is to marshal a JSON structure that specifies information beyond what a regular TaskTemplate can capture. The actual structure of the JSON is known only to the implemented backend-plugin and the SDK components. The core Flyte platform, does not understand of look into the specifics of this structure.
-It is highly recommended to use an interface definition lanugage like Protobuf, OpenAPISpec etc to declare specify the structure of the JSON. From here, on we refer to this as the ``Plugin Specification``.
+It is highly recommended to use an interface definition language like Protobuf, OpenAPISpec etc to declare specify the structure of the JSON. From here, on we refer to this as the ``Plugin Specification``.
 
 For Spark we decided to use Protobuf to specify the plugin as can be seen `here <https://github.com/flyteorg/flyteidl/blob/master/protos/flyteidl/plugins/spark.proto>`__. Note it is not necessary to have the Plugin structure specified in FlyteIDL, we do it for simplicity, ease of maintenance alongwith the core platform and because of existing tooling to generate code for protobuf.
 
@@ -45,7 +45,7 @@
 
 FlytePropeller backend Plugin
 ------------------------------
-The backend plugin is where the actual logic of the execution is implemented. The backend plugin uses Flyte - PluginMachinery inteface to implement a plugin which can be one of the following supported types
+The backend plugin is where the actual logic of the execution is implemented. The backend plugin uses Flyte - PluginMachinery interface to implement a plugin which can be one of the following supported types
 
 #. A `Kubernetes operator Plugin <https://pkg.go.dev/github.com/lyft/flyteplugins@v0.5.26/go/tasks/pluginmachinery/k8s#Plugin>`_. The following demo shows 2 examples of K8s backend plugins: Flytekit Athena & Spark, and Flyte K8s Pod & Spark:
 
diff --git a/cookbook/core/extend_flyte/custom_task_plugin.py b/cookbook/core/extend_flyte/custom_task_plugin.py
index d740c58b61..9f5f95e73b 100644
--- a/cookbook/core/extend_flyte/custom_task_plugin.py
+++ b/cookbook/core/extend_flyte/custom_task_plugin.py
@@ -6,7 +6,7 @@
 
 Flytekit is designed to be extremely extensible. You can add new task-types that are useful only for your use-cases.
 Flyte does come with the capability of extending the backend, but that is only required if you want the capability to be
-extended to all users of Flyte, or there is a cost/visibility benefit of doing so. 
+extended to all users of Flyte, or there is a cost/visibility benefit of doing so.
 
 The following demo shows how to build Flyte container task extensions, with an SQLAlchemy extension as an example:
 
@@ -59,7 +59,10 @@ class WaitForObjectStoreFile(PythonTask):
     _VAR_NAME: str = "path"
 
     def __init__(
-        self, name: str, poll_interval: timedelta = timedelta(seconds=10), **kwargs,
+        self,
+        name: str,
+        poll_interval: timedelta = timedelta(seconds=10),
+        **kwargs,
     ):
         super(WaitForObjectStoreFile, self).__init__(
             task_type="object-store-sensor",
diff --git a/cookbook/core/extend_flyte/custom_types.py b/cookbook/core/extend_flyte/custom_types.py
index fa0843fc34..fe0883fdc4 100644
--- a/cookbook/core/extend_flyte/custom_types.py
+++ b/cookbook/core/extend_flyte/custom_types.py
@@ -27,19 +27,10 @@
 import typing
 from typing import Type
 
-from flytekit import (
-    Blob,
-    BlobMetadata,
-    BlobType,
-    FlyteContext,
-    Literal,
-    LiteralType,
-    Scalar,
-    task,
-    workflow,
-)
+from flytekit import Blob, BlobMetadata, BlobType, FlyteContext, Literal, LiteralType, Scalar, task, workflow
 from flytekit.extend import TypeEngine, TypeTransformer
 
+
 # %%
 # .. note::
 #   ``FlyteContext`` is used to access a random local directory.
diff --git a/cookbook/core/flyte_basics/basic_workflow.py b/cookbook/core/flyte_basics/basic_workflow.py
index d83dc28233..587a4df827 100644
--- a/cookbook/core/flyte_basics/basic_workflow.py
+++ b/cookbook/core/flyte_basics/basic_workflow.py
@@ -17,9 +17,9 @@
 Now, let's get started with a simple workflow.
 """
 import typing
+from typing import Tuple
 
 from flytekit import task, workflow
-from typing import Tuple
 
 
 @task
@@ -52,7 +52,7 @@ def my_wf(a: int, b: str) -> Tuple[int, str]:
 # workflow is executed.
 #
 # A workflow can be executed locally where the evaluation will happen immediately, or using the CLI, UI, etc., which will trigger an evaluation.
-# Although Flyte workflows decorated with ``@workflow`` look like Python functions, they are actually python-esque, Domain Specific Language (DSL) entities 
+# Although Flyte workflows decorated with ``@workflow`` look like Python functions, they are actually python-esque, Domain Specific Language (DSL) entities
 # that recognize the ``@task`` decorators. When a workflow encounters a ``@task``-decorated Python function, it creates a
 # :py:class:`flytekit.core.promise.Promise` object. This promise doesn't contain the actual output of the task, and is only fulfilled at execution time.
 #
diff --git a/cookbook/core/flyte_basics/decorating_tasks.py b/cookbook/core/flyte_basics/decorating_tasks.py
index fd33a61776..79f919fc09 100644
--- a/cookbook/core/flyte_basics/decorating_tasks.py
+++ b/cookbook/core/flyte_basics/decorating_tasks.py
@@ -14,7 +14,6 @@
 
 from flytekit import task, workflow
 
-
 logger = logging.getLogger(__file__)
 
 
@@ -25,6 +24,7 @@
 #
 # Here we define decorator that logs the input and output information of a decorated task.
 
+
 def log_io(fn):
     @wraps(fn)
     def wrapper(*args, **kwargs):
@@ -42,6 +42,7 @@ def wrapper(*args, **kwargs):
 # .. note::
 #    The order of invoking the decorators is important. ``@task`` should always be the outer-most decorator.
 
+
 @task
 @log_io
 def t1(x: int) -> int:
@@ -61,12 +62,15 @@ def t1(x: int) -> int:
 # .. note::
 #    The ``validate_output`` output uses :py:func:`~functools.partial` to implement parameterized decorators.
 
+
 def validate_output(fn=None, *, floor=0):
     @wraps(fn)
     def wrapper(*args, **kwargs):
         out = fn(*args, **kwargs)
         if out <= floor:
-            raise ValueError(f"output of task {fn.__name__} must be a positive number, found {out}")
+            raise ValueError(
+                f"output of task {fn.__name__} must be a positive number, found {out}"
+            )
         return out
 
     if fn is None:
@@ -74,9 +78,11 @@ def wrapper(*args, **kwargs):
 
     return wrapper
 
+
 # %%
 # Now let's define a function that uses both the logging and validator decorators:
 
+
 @task
 @log_io
 @validate_output(floor=10)
@@ -87,6 +93,7 @@ def t2(x: int) -> int:
 # %%
 # Finally, we compose a workflow that calls ``t1`` and ``t2``.
 
+
 @workflow
 def wf(x: int) -> int:
     return t2(x=t1(x=x))
diff --git a/cookbook/core/flyte_basics/decorating_workflows.py b/cookbook/core/flyte_basics/decorating_workflows.py
index 47b891b83d..88031a8714 100644
--- a/cookbook/core/flyte_basics/decorating_workflows.py
+++ b/cookbook/core/flyte_basics/decorating_workflows.py
@@ -20,9 +20,10 @@
 """
 
 from functools import partial, wraps
+from unittest.mock import MagicMock
 
 import flytekit
-from flytekit import task, workflow, FlyteContextManager
+from flytekit import FlyteContextManager, task, workflow
 from flytekit.core.node_creation import create_node
 
 # %%
@@ -30,7 +31,6 @@
 # :py:class:`unittest.mock.MagicMock` class to create a fake external service that we want to initialize at the
 # beginning of our workflow and finish at the end.
 
-from unittest.mock import MagicMock
 
 external_service = MagicMock()
 
@@ -57,6 +57,7 @@ def teardown():
 #
 # Next we create the decorator that we'll use to wrap our workflow function.
 
+
 def setup_teardown(fn=None, *, before, after):
     @wraps(fn)
     def wrapper(*args, **kwargs):
@@ -93,6 +94,7 @@ def wrapper(*args, **kwargs):
 
     return wrapper
 
+
 # %%
 # There are a few key pieces to note in the ``setup_teardown`` decorator above:
 #
@@ -112,6 +114,7 @@ def wrapper(*args, **kwargs):
 #
 # Now let's define two tasks that will constitute the workflow
 
+
 @task
 def t1(x: float) -> float:
     return x - 1
@@ -119,11 +122,13 @@ def t1(x: float) -> float:
 
 @task
 def t2(x: float) -> float:
-    return x ** 2
+    return x**2
+
 
 # %%
 # And then create our decorated workflow:
 
+
 @workflow
 @setup_teardown(before=setup, after=teardown)
 def wf(x: float) -> float:
diff --git a/cookbook/core/flyte_basics/documented_workflow.py b/cookbook/core/flyte_basics/documented_workflow.py
index 10db71b512..e31d8fb125 100644
--- a/cookbook/core/flyte_basics/documented_workflow.py
+++ b/cookbook/core/flyte_basics/documented_workflow.py
@@ -17,6 +17,7 @@
 from dataclasses_json import dataclass_json
 from flytekit import task, workflow
 
+
 # %%
 # We define a dataclass.
 @dataclass_json
@@ -67,7 +68,7 @@ def sphinx_docstring(df: pd.DataFrame, data: PandasData = PandasData()) -> pd.Da
 # The first block of the docstring is a one-liner about the workflow.
 # The second block of the docstring consists of a detailed description.
 # The third block of the docstring describes all the parameters along with their data types.
-# The fourth block of the docstring descibes the return type along with its data type.
+# The fourth block of the docstring describes the return type along with its data type.
 @workflow
 def numpy_docstring(df: pd.DataFrame, data: PandasData = PandasData()) -> pd.DataFrame:
     """
diff --git a/cookbook/core/flyte_basics/files.py b/cookbook/core/flyte_basics/files.py
index 0da1942c97..3079452ffd 100644
--- a/cookbook/core/flyte_basics/files.py
+++ b/cookbook/core/flyte_basics/files.py
@@ -26,7 +26,6 @@
 from flytekit import task, workflow
 from flytekit.types.file import FlyteFile
 
-
 # %%
 # Next, we write a task that accepts a ``FlyteFile``, a list of column names,
 # and a list of column names to normalize, then outputs a csv file of only
@@ -39,6 +38,7 @@
 #   ``FlyteFile[typing.TypeVar("jpeg")]``). The format is entirely optional,
 #   and if not specified, defaults to ``""``.
 
+
 @task
 def normalize_columns(
     csv_url: FlyteFile,
@@ -48,7 +48,7 @@ def normalize_columns(
 ) -> FlyteFile:
     # read the data from the raw csv file
     parsed_data = defaultdict(list)
-    with open(csv_url, newline='\n') as input_file:
+    with open(csv_url, newline="\n") as input_file:
         reader = csv.DictReader(input_file, fieldnames=column_names)
         for row in (x for i, x in enumerate(reader) if i > 0):
             for column in columns_to_normalize:
@@ -94,6 +94,7 @@ def normalize_columns(
 # the workflow. This is passed to the ``location`` input of the task. If it's not an empty string, the task attempts to
 # upload its file to that location.
 
+
 @workflow
 def normalize_csv_file(
     csv_url: FlyteFile,
@@ -125,7 +126,9 @@ def normalize_csv_file(
         ),
     ]
     print(f"Running {__file__} main...")
-    for index, (csv_url, column_names, columns_to_normalize) in enumerate(default_files):        
+    for index, (csv_url, column_names, columns_to_normalize) in enumerate(
+        default_files
+    ):
         normalized_columns = normalize_csv_file(
             csv_url=csv_url,
             column_names=column_names,
diff --git a/cookbook/core/flyte_basics/folders.py b/cookbook/core/flyte_basics/folders.py
index 081c872c13..d4f565f2e4 100644
--- a/cookbook/core/flyte_basics/folders.py
+++ b/cookbook/core/flyte_basics/folders.py
@@ -30,13 +30,14 @@ def download_files(csv_urls: List[str]) -> FlyteDirectory:
     working_dir = flytekit.current_context().working_directory
     local_dir = Path(os.path.join(working_dir, "csv_files"))
     local_dir.mkdir(exist_ok=True)
-    
+
     # get the number of digits needed to preserve the order of files in the local directory
     zfill_len = len(str(len(csv_urls)))
     for idx, remote_location in enumerate(csv_urls):
         local_image = os.path.join(
             # prefix the file name with the index location of the file in the original csv_urls list
-            local_dir, f"{str(idx).zfill(zfill_len)}_{os.path.basename(remote_location)}"
+            local_dir,
+            f"{str(idx).zfill(zfill_len)}_{os.path.basename(remote_location)}",
         )
         urllib.request.urlretrieve(remote_location, local_image)
     return FlyteDirectory(path=str(local_dir))
@@ -50,6 +51,7 @@ def download_files(csv_urls: List[str]) -> FlyteDirectory:
 #    demonstrates how Flyte tasks are simply entrypoints of execution, which can themselves call
 #    other functions and routines that are written in pure python.
 
+
 def normalize_columns(
     local_csv_file: str,
     column_names: List[str],
@@ -57,7 +59,7 @@ def normalize_columns(
 ):
     # read the data from the raw csv file
     parsed_data = defaultdict(list)
-    with open(local_csv_file, newline='\n') as input_file:
+    with open(local_csv_file, newline="\n") as input_file:
         reader = csv.DictReader(input_file, fieldnames=column_names)
         for row in (x for i, x in enumerate(reader) if i > 0):
             for column in columns_to_normalize:
@@ -82,6 +84,7 @@ def normalize_columns(
 # Now we define a task that accepts the previously downloaded folder, along with some metadata about the
 # column names of each file in the directory and the column names that we want to normalize.
 
+
 @task
 def normalize_all_files(
     csv_files_dir: FlyteDirectory,
@@ -103,6 +106,7 @@ def normalize_all_files(
 # of url strings pointing to a remote location containing a csv file, a list of column names
 # associated with each csv file, and a list of columns that we want to normalize.
 
+
 @workflow
 def download_and_normalize_csv_files(
     csv_urls: List[str],
@@ -139,7 +143,4 @@ def download_and_normalize_csv_files(
         columns_metadata=columns_metadata,
         columns_to_normalize_metadata=columns_to_normalize_metadata,
     )
-    print(
-        f"Running download_and_normalize_csv_files on {csv_urls}: "
-        f"{directory}"
-    )
+    print(f"Running download_and_normalize_csv_files on {csv_urls}: " f"{directory}")
diff --git a/cookbook/core/flyte_basics/lp.py b/cookbook/core/flyte_basics/lp.py
index f442e1ce29..c21c58379e 100644
--- a/cookbook/core/flyte_basics/lp.py
+++ b/cookbook/core/flyte_basics/lp.py
@@ -52,7 +52,9 @@ def my_wf(val: int) -> int:
 
 # %%
 # It is possible to **fix** launch plan inputs, so that they can't be overridden at execution call time.
-my_fixed_lp = LaunchPlan.get_or_create(name="always_2_lp", workflow=my_wf, fixed_inputs={"val": 4})
+my_fixed_lp = LaunchPlan.get_or_create(
+    name="always_2_lp", workflow=my_wf, fixed_inputs={"val": 4}
+)
 square_2 = my_fixed_lp()
 # error:
 # square_1 = my_fixed_lp(val=1)
diff --git a/cookbook/core/flyte_basics/named_outputs.py b/cookbook/core/flyte_basics/named_outputs.py
index bdaa2f0205..aa4975a212 100644
--- a/cookbook/core/flyte_basics/named_outputs.py
+++ b/cookbook/core/flyte_basics/named_outputs.py
@@ -15,7 +15,6 @@
 
 from flytekit import task, workflow
 
-
 # %%
 # Named outputs can be declared inline as in the following task signature.
 #
@@ -53,6 +52,7 @@ def say_hello() -> hello_output:
 #    Note that we are de-referencing the individual task execution outputs because named-outputs use NamedTuple
 #    which are tuples that need to be de-referenced.
 
+
 @workflow
 def my_wf() -> wf_outputs:
     return wf_outputs(say_hello().greet, say_hello().greet)
diff --git a/cookbook/core/flyte_basics/shell_task.py b/cookbook/core/flyte_basics/shell_task.py
index f2027b6918..01e6494c3b 100644
--- a/cookbook/core/flyte_basics/shell_task.py
+++ b/cookbook/core/flyte_basics/shell_task.py
@@ -16,7 +16,6 @@
 from flytekit.types.directory import FlyteDirectory
 from flytekit.types.file import FlyteFile
 
-
 t1 = ShellTask(
     name="task_1",
     debug=True,
@@ -32,9 +31,7 @@
     fi
     """,
     inputs=kwtypes(x=FlyteFile),
-    output_locs=[
-        OutputLocation(var="i", var_type=FlyteFile, location="{inputs.x}")
-    ],
+    output_locs=[OutputLocation(var="i", var_type=FlyteFile, location="{inputs.x}")],
 )
 
 
diff --git a/cookbook/core/flyte_basics/task_cache.py b/cookbook/core/flyte_basics/task_cache.py
index cfe66ccf87..6ab4317d86 100644
--- a/cookbook/core/flyte_basics/task_cache.py
+++ b/cookbook/core/flyte_basics/task_cache.py
@@ -16,11 +16,16 @@
 
 """
 
+import time
+
+import pandas
+
 # %%
-# 
+#
 # For any :py:func:`flytekit.task` in Flyte, there is always one required import, which is:
-from flytekit import task
-
+from flytekit import HashMethod, task, workflow
+from flytekit.core.node_creation import create_node
+from typing_extensions import Annotated
 
 # %%
 # Task caching is disabled by default to avoid unintended consequences of caching tasks with side effects. To enable caching and control its behavior, use the ``cache`` and ``cache_version`` parameters when constructing a task.
@@ -28,6 +33,8 @@
 # ``cache_version`` field indicates that the task functionality has changed.
 # Bumping the ``cache_version`` is akin to invalidating the cache.
 # Flyte users can manually update this version and Flyte will cache the next execution instead of relying on the old cache.
+
+
 @task(cache=True, cache_version="1.0")
 def square(n: int) -> int:
     """
@@ -152,30 +159,29 @@ def square(n: int) -> int:
 # %%
 # Here's a complete example of the feature:
 
-import pandas
-import time
-from typing_extensions import Annotated
-
-from flytekit import HashMethod, workflow
-from flytekit.core.node_creation import create_node
-
 
 def hash_pandas_dataframe(df: pandas.DataFrame) -> str:
     return str(pandas.util.hash_pandas_object(df))
 
+
 @task
-def uncached_data_reading_task() -> Annotated[pandas.DataFrame, HashMethod(hash_pandas_dataframe)]:
+def uncached_data_reading_task() -> Annotated[
+    pandas.DataFrame, HashMethod(hash_pandas_dataframe)
+]:
     return pandas.DataFrame({"column_1": [1, 2, 3]})
 
+
 @task(cache=True, cache_version="1.0")
 def cached_data_processing_task(df: pandas.DataFrame) -> pandas.DataFrame:
     time.sleep(1)
     return df * 2
 
+
 @task
 def compare_dataframes(df1: pandas.DataFrame, df2: pandas.DataFrame):
     assert df1.equals(df2)
 
+
 @workflow
 def cached_dataframe_wf():
     raw_data = uncached_data_reading_task()
@@ -192,5 +198,5 @@ def cached_dataframe_wf():
 
 
 if __name__ == "__main__":
-    print(f"Running cached_dataframe_wf once")
     df1 = cached_dataframe_wf()
+    print(f"Running cached_dataframe_wf once : {df1}")
diff --git a/cookbook/core/flyte_basics/task_cache_serialize.py b/cookbook/core/flyte_basics/task_cache_serialize.py
index 09ba37b5f7..8235e0b9f8 100644
--- a/cookbook/core/flyte_basics/task_cache_serialize.py
+++ b/cookbook/core/flyte_basics/task_cache_serialize.py
@@ -12,7 +12,7 @@
 """
 
 # %%
-# 
+#
 # For any :py:func:`flytekit.task` in Flyte, there is always one required import, which is:
 from flytekit import task
 
@@ -20,7 +20,7 @@
 # %%
 # Task cache serializing is disabled by default to avoid unexpected behavior for task executions. To enable use the ``cache_serialize`` parameter.
 # ``cache_serialize`` is a switch to enable or disable serialization of the task
-# This operation is only useful for cachable tasks, where one may reuse output from a previous execution. Flyte requires implicitly enabling the ``cache`` parameter on all cache serializable tasks.
+# This operation is only useful for cacheable tasks, where one may reuse output from a previous execution. Flyte requires implicitly enabling the ``cache`` parameter on all cache serializable tasks.
 # Cache key definitions follow the same rules as non-serialized cache tasks. It is important to understand the implications of the task signature and ``cache_version`` parameter in defining cached results.
 @task(cache=True, cache_serialize=True, cache_version="1.0")
 def square(n: int) -> int:
@@ -35,6 +35,7 @@ def square(n: int) -> int:
     """
     return n * n
 
+
 # %%
 # In the above example calling `square(n=2)` multiple times concurrently (even in different executions or workflows) will only execute the multiplication operation once.
 # Concurrently evaluated tasks will wait for completion of the first instance before reusing the cached results and subsequent evaluations will instantly reuse existing cache results.
@@ -45,7 +46,7 @@ def square(n: int) -> int:
 #
 # The cache serialize paradigm introduces a new artifact reservation system. Tasks may use this reservation system to acquire an artifact reservation, indicating that they are actively evaluating the task, and release the reservation, once the execution is completed. Flyte uses a clock-skew algorithm to define reservation timeouts. Therefore, tasks are required to periodically extend the reservation during execution.
 #
-# The first execution of a serializable cached task will successfully acquire the artifact reservation. Execution will be performed as usual and upon completion, the results are written to the cache and reservation is released. Concurrently executed task instances (i.e. in parallel with the initial execution) will observe an active reservation, in which case the execution will wait until the next reevaluation and perform another check. Once the initial execution completes it will reuse the cached results. Subsequently executed task instances (i.e. after an execution has already completed successfully) will immediately reuse the existing cached results. 
+# The first execution of a serializable cached task will successfully acquire the artifact reservation. Execution will be performed as usual and upon completion, the results are written to the cache and reservation is released. Concurrently executed task instances (i.e. in parallel with the initial execution) will observe an active reservation, in which case the execution will wait until the next reevaluation and perform another check. Once the initial execution completes it will reuse the cached results. Subsequently executed task instances (i.e. after an execution has already completed successfully) will immediately reuse the existing cached results.
 #
 # Flyte handles task execution failures using a timeout on the reservation. If the task currently holding the reservation fails to extend it before it times out, another task may acquire the reservation and begin executing the task.
-# 
+#
diff --git a/cookbook/core/type_system/enums.py b/cookbook/core/type_system/enums.py
index 4df6daf66b..eb72c524e7 100644
--- a/cookbook/core/type_system/enums.py
+++ b/cookbook/core/type_system/enums.py
@@ -16,9 +16,10 @@
 and the Enum types are not optional. So when defining enums, design them well to always make the first value as a
 valid default.
 """
-from flytekit import task, workflow
-from typing import Tuple
 from enum import Enum
+from typing import Tuple
+
+from flytekit import task, workflow
 
 
 # %%
diff --git a/cookbook/core/type_system/flyte_pickle.py b/cookbook/core/type_system/flyte_pickle.py
index 61bc9c3fad..00a7033706 100644
--- a/cookbook/core/type_system/flyte_pickle.py
+++ b/cookbook/core/type_system/flyte_pickle.py
@@ -54,4 +54,4 @@ def welcome(name: str) -> People:
     This workflow can be run locally. During local execution also,
     the custom object (People) will be marshalled to and from python pickle.
     """
-    welcome(name='Foo')
+    welcome(name="Foo")
diff --git a/cookbook/core/type_system/structured_dataset.py b/cookbook/core/type_system/structured_dataset.py
index 2761f56d8f..65db0efa16 100644
--- a/cookbook/core/type_system/structured_dataset.py
+++ b/cookbook/core/type_system/structured_dataset.py
@@ -23,19 +23,18 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
-
-from flytekit import task, workflow, kwtypes, FlyteContext, StructuredDatasetType
+from flytekit import FlyteContext, StructuredDatasetType, kwtypes, task, workflow
 from flytekit.models import literals
 from flytekit.models.literals import StructuredDatasetMetadata
 from flytekit.types.schema import FlyteSchema
 from flytekit.types.structured.structured_dataset import (
+    LOCAL,
+    PARQUET,
+    S3,
     StructuredDataset,
-    StructuredDatasetEncoder,
     StructuredDatasetDecoder,
+    StructuredDatasetEncoder,
     StructuredDatasetTransformerEngine,
-    PARQUET,
-    S3,
-    LOCAL,
 )
 
 try:
@@ -161,6 +160,8 @@ def decode(
 # %%
 # Let's define a task to test the above functionality.
 # We open a structured dataset of type ``numpy.ndarray`` and serialize it again.
+
+
 @task
 def to_numpy(
     ds: Annotated[StructuredDataset, subset_cols]
@@ -193,6 +194,6 @@ def schema_compatibility_wf(a: int) -> Annotated[StructuredDataset, subset_cols]
 # You can run the code locally as follows:
 if __name__ == "__main__":
     numpy_array_one = pandas_compatibility_wf(a=42).open(np.ndarray).all()
-    print(f"pandas DataFrame compatibility check output: ", numpy_array_one)
+    print(f"pandas DataFrame compatibility check output: {numpy_array_one}")
     numpy_array_two = schema_compatibility_wf(a=42).open(np.ndarray).all()
-    print(f"Schema compatibility check output: ", numpy_array_two)
+    print(f"Schema compatibility check output: {numpy_array_two}")
diff --git a/cookbook/core/type_system/typed_schema.py b/cookbook/core/type_system/typed_schema.py
index b9653e241f..caed2b8c61 100644
--- a/cookbook/core/type_system/typed_schema.py
+++ b/cookbook/core/type_system/typed_schema.py
@@ -11,7 +11,7 @@
 from flytekit import kwtypes, task, workflow
 
 # %%
-# Flytekit consists of some pre-built type extenstions, one of them is the FlyteSchema type
+# Flytekit consists of some pre-built type extensions, one of them is the FlyteSchema type
 from flytekit.types.schema import FlyteSchema
 
 # %%
@@ -52,7 +52,7 @@ def wf() -> FlyteSchema[kwtypes(x=int)]:
 
 
 # %%
-# Local execution will convert the data to and from the serialized representation thus, mimicing a complete distributed
+# Local execution will convert the data to and from the serialized representation thus, mimicking a complete distributed
 # execution.
 if __name__ == "__main__":
     print(f"Running {__file__} main...")
diff --git a/cookbook/deployment/configure_use_gpus.py b/cookbook/deployment/configure_use_gpus.py
index a690898065..7a7eb03369 100644
--- a/cookbook/deployment/configure_use_gpus.py
+++ b/cookbook/deployment/configure_use_gpus.py
@@ -29,4 +29,4 @@
               operator: "Equal"
               value: "value1"
               effect: "NoSchedule"
-"""
\ No newline at end of file
+"""
diff --git a/cookbook/deployment/deploying_workflows.py b/cookbook/deployment/deploying_workflows.py
index 6abd569928..bba3d9f96c 100644
--- a/cookbook/deployment/deploying_workflows.py
+++ b/cookbook/deployment/deploying_workflows.py
@@ -19,7 +19,7 @@
 
 - Use caching to avoid calling the same task with the same inputs (for the same version)
 - Portability: You can reference pre-registered entities under any domain or project within your workflow code
-- Sharable executions: you can easily share links to your executions with your teammates
+- Shareable executions: you can easily share links to your executions with your teammates
 
 Please refer to the :doc:`Getting Started <flyte:getting_started>` for details on getting started with the Flyte installation.
 
@@ -99,7 +99,7 @@
 In-container serialization
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Notice that the commands above are run locally, _not_ inside the container. Strictly speaking, to be rigourous, serialization should be done within the container for the following reasons.
+Notice that the commands above are run locally, _not_ inside the container. Strictly speaking, to be rigorous, serialization should be done within the container for the following reasons.
 
 1. It ensures that the versions of all libraries used at execution time on the Flyte platform, are the same that are used during serialization.
 2. Since serialization runs part of flytekit, it helps ensure that your container is set up correctly.
diff --git a/cookbook/deployment/lp_notifications.py b/cookbook/deployment/lp_notifications.py
index c486a834fa..d993c91371 100644
--- a/cookbook/deployment/lp_notifications.py
+++ b/cookbook/deployment/lp_notifications.py
@@ -22,9 +22,11 @@
 # the :py:class:`flytekit:flytekit.Email`, :py:class:`flytekit:flytekit.PagerDuty`, or :py:class:`flytekit:flytekit.Slack`
 # objects can be used in the construction of a :py:class:`flytekit:flytekit.LaunchPlan`.
 
+from datetime import timedelta
+
 # %%
 # Consider the following example workflow:
-from flytekit import Email, LaunchPlan, task, workflow, WorkflowExecutionPhase
+from flytekit import Email, FixedRate, LaunchPlan, PagerDuty, Slack, WorkflowExecutionPhase, task, workflow
 
 
 @task
@@ -37,9 +39,10 @@ def int_doubler_wf(a: int) -> str:
     doubled = double_int_and_print(a=a)
     return doubled
 
+
 # %%
 # Here are three scenarios that can help deepen your understanding of how notifications work:
-# 
+#
 # 1. Launch Plan triggers email notifications when the workflow execution reaches the ``SUCCEEDED`` phase.
 int_doubler_wf_lp = LaunchPlan.get_or_create(
     name="int_doubler_wf",
@@ -55,9 +58,7 @@ def int_doubler_wf(a: int) -> str:
 
 # %%
 # 2. Notifications shine when used for scheduled workflows to alert for failures.
-from datetime import timedelta
 
-from flytekit import FixedRate, PagerDuty
 
 int_doubler_wf_scheduled_lp = LaunchPlan.get_or_create(
     name="int_doubler_wf_scheduled",
@@ -75,7 +76,6 @@ def int_doubler_wf(a: int) -> str:
 
 # %%
 # 3. Notifications can be combined with different permutations of terminal phases and recipient targets.
-from flytekit import Slack
 
 wacky_int_doubler_lp = LaunchPlan.get_or_create(
     name="wacky_int_doubler",
@@ -122,7 +122,7 @@ def int_doubler_wf(a: int) -> str:
 # ======
 #
 # To publish notifications, you'll need to set up an `SNS topic <https://aws.amazon.com/sns/?whats-new-cards.sort-by=item.additionalFields.postDateTime&whats-new-cards.sort-order=desc>`_.
-# 
+#
 # To process notifications, you'll need to set up an `AWS SQS <https://aws.amazon.com/sqs/>`_ queue to consume notification events. This queue must be configured as a subscription to your SNS topic you created above.
 #
 # To publish notifications, you'll need a `verified SES email address <https://docs.aws.amazon.com/ses/latest/DeveloperGuide/verify-addresses-and-domains.html>`_ which will be used to send notification emails and alerts using email APIs.
@@ -134,7 +134,7 @@ def int_doubler_wf(a: int) -> str:
 # .. code-block:: bash
 #
 #    notifications:
-#      type: "aws"
+#      type: "aws"      # noqa: F821
 #      region: "us-east-1"
 #      publisher:
 #        topicName: "arn:aws:sns:us-east-1:{{ YOUR ACCOUNT ID }}:{{ YOUR TOPIC }}"
@@ -162,4 +162,3 @@ def int_doubler_wf(a: int) -> str:
 #     * **body**: Configurable email body used in notifications.
 #
 # The complete set of parameters that can be used for email templating are checked in `here <https://github.com/flyteorg/flyteadmin/blob/a84223dab00dfa52d8ba1ed2d057e77b6c6ab6a7/pkg/async/notifications/email.go#L18,L30>`_.
-
diff --git a/cookbook/dev-requirements.in b/cookbook/dev-requirements.in
index 21792ba3a7..f6a42a5e3c 100644
--- a/cookbook/dev-requirements.in
+++ b/cookbook/dev-requirements.in
@@ -11,11 +11,16 @@
 -r ./case_studies/ml_training/mnist_classifier/requirements.in
 -r ./case_studies/ml_training/pima_diabetes/requirements.in
 
-black==19.10b0
+black==22.3.0
 coverage
 flake8
+pre-commit
 flake8-black
 flake8-isort
+codespell
 isort
 mock
 pytest
+mypy
+google-cloud-bigquery
+google-cloud-bigquery-storage
\ No newline at end of file
diff --git a/cookbook/dev-requirements.txt b/cookbook/dev-requirements.txt
index b600cb7d7f..7fdd1266a5 100644
--- a/cookbook/dev-requirements.txt
+++ b/cookbook/dev-requirements.txt
@@ -1,746 +1,172 @@
 #
-# This file is autogenerated by pip-compile with python 3.8
+# This file is autogenerated by pip-compile with python 3.9
 # To update, run:
 #
-#    /Library/Developer/CommandLineTools/usr/bin/make dev-requirements.txt
+#    pip-compile dev-requirements.in
 #
-absl-py==0.12.0
-    # via
-    #   tensorboard
-    #   tensorflow
-    #   tensorflow-datasets
-    #   tensorflow-metadata
-altair==4.1.0
-    # via great-expectations
-appdirs==1.4.4
-    # via black
-appnope==0.1.2
-    # via
-    #   ipykernel
-    #   ipython
-argon2-cffi==21.1.0
-    # via notebook
-astunparse==1.6.3
-    # via tensorflow
-attrs==21.2.0
-    # via
-    #   black
-    #   hypothesis
-    #   jsonschema
-    #   pytest
-    #   scantree
-    #   tensorflow-datasets
-backcall==0.2.0
-    # via ipython
-backports.zoneinfo==0.2.1
-    # via tzlocal
-bcrypt==3.2.0
-    # via paramiko
-black==19.10b0
+attrs==21.4.0
+    # via pytest
+black==22.3.0
     # via
     #   -r dev-requirements.in
     #   flake8-black
-bleach==4.1.0
-    # via nbconvert
-boto3==1.18.56
-    # via sagemaker-training
-botocore==1.21.56
-    # via
-    #   boto3
-    #   s3transfer
-cachetools==4.2.4
+cachetools==5.0.0
     # via google-auth
-certifi==2021.5.30
-    # via
-    #   kubernetes
-    #   requests
-    #   sentry-sdk
-cffi==1.14.6
-    # via
-    #   argon2-cffi
-    #   bcrypt
-    #   cryptography
-    #   pynacl
-charset-normalizer==2.0.6
+certifi==2021.10.8
     # via requests
-clang==5.0
-    # via tensorflow
-click==7.1.2
-    # via
-    #   black
-    #   flytekit
-    #   great-expectations
-    #   wandb
-configparser==5.0.2
-    # via wandb
-coverage==6.0
+cfgv==3.3.1
+    # via pre-commit
+charset-normalizer==2.0.12
+    # via requests
+click==8.1.2
+    # via black
+codespell==2.1.0
     # via -r dev-requirements.in
-croniter==1.0.15
-    # via flytekit
-cryptography==35.0.0
-    # via paramiko
-cycler==0.10.0
-    # via matplotlib
-dataclasses-json==0.5.6
-    # via
-    #   dolt-integrations
-    #   flytekit
-debugpy==1.5.0
-    # via ipykernel
-decorator==5.1.0
-    # via
-    #   ipython
-    #   retry
-defusedxml==0.7.1
-    # via nbconvert
-deprecated==1.2.13
-    # via flytekit
-dill==0.3.4
-    # via tensorflow-datasets
-dirhash==0.2.1
-    # via flytekit
-diskcache==5.2.1
-    # via flytekit
-docker-image-py==0.1.12
-    # via flytekit
-docker-pycreds==0.4.0
-    # via wandb
-docstring-parser==0.11
-    # via flytekit
-dolt-integrations==0.1.5
-    # via flytekitplugins.dolt
-doltcli==0.1.15
-    # via dolt-integrations
-entrypoints==0.3
-    # via
-    #   altair
-    #   jupyter-client
-    #   nbconvert
-flake8==3.9.2
+coverage==6.3.2
+    # via -r dev-requirements.in
+distlib==0.3.4
+    # via virtualenv
+filelock==3.6.0
+    # via virtualenv
+flake8==4.0.1
     # via
     #   -r dev-requirements.in
     #   flake8-black
     #   flake8-isort
-flake8-black==0.2.3
+flake8-black==0.3.2
     # via -r dev-requirements.in
-flake8-isort==4.0.0
+flake8-isort==4.1.1
     # via -r dev-requirements.in
-flatbuffers==1.12
-    # via tensorflow
-flyteidl==0.20.2
-    # via flytekit
-flytekit==0.22.2
-    # via
-    #   -r ./case_studies/ml_training/house_price_prediction/../../../common/requirements-common.in
-    #   -r ./case_studies/ml_training/mnist_classifier/../../../common/requirements-common.in
-    #   -r ./case_studies/ml_training/pima_diabetes/../../../common/requirements-common.in
-    #   -r ./integrations/aws/sagemaker_pytorch/../../../common/requirements-common.in
-    #   -r ./integrations/aws/sagemaker_training/../../../common/requirements-common.in
-    #   -r ./integrations/external_services/hive/../../../common/requirements-common.in
-    #   -r ./integrations/flytekit_plugins/dolt/../../../common/requirements-common.in
-    #   -r ./integrations/flytekit_plugins/pandera_examples/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/k8s_spark/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/kfpytorch/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/pod/../../../common/requirements-common.in
-    #   flytekitplugins-awssagemaker
-    #   flytekitplugins-hive
-    #   flytekitplugins-kfpytorch
-    #   flytekitplugins-pandera
-    #   flytekitplugins-pod
-    #   flytekitplugins-spark
-    #   flytekitplugins.dolt
-flytekitplugins-awssagemaker==0.22.3
+google-api-core[grpc]==2.7.3
     # via
-    #   -r ./integrations/aws/sagemaker_pytorch/requirements.in
-    #   -r ./integrations/aws/sagemaker_training/requirements.in
-flytekitplugins-hive==0.22.3
-    # via -r ./integrations/external_services/hive/requirements.in
-flytekitplugins-kfpytorch==0.22.3
-    # via -r ./integrations/kubernetes/kfpytorch/requirements.in
-flytekitplugins-pandera==0.22.3
-    # via -r ./integrations/flytekit_plugins/pandera_examples/requirements.in
-flytekitplugins-pod==0.22.3
-    # via -r ./integrations/kubernetes/pod/requirements.in
-flytekitplugins-spark==0.22.3
-    # via -r ./integrations/kubernetes/k8s_spark/requirements.in
-flytekitplugins.dolt==0.22.3
-    # via -r ./integrations/flytekit_plugins/dolt/requirements.in
-future==0.18.2
-    # via tensorflow-datasets
-gast==0.4.0
-    # via tensorflow
-gevent==21.8.0
-    # via sagemaker-training
-gitdb==4.0.7
-    # via gitpython
-gitpython==3.1.24
-    # via wandb
-google-auth==1.35.0
+    #   google-cloud-bigquery
+    #   google-cloud-bigquery-storage
+    #   google-cloud-core
+google-auth==2.6.6
     # via
-    #   google-auth-oauthlib
-    #   kubernetes
-    #   tensorboard
-google-auth-oauthlib==0.4.6
-    # via tensorboard
-google-pasta==0.2.0
-    # via tensorflow
-googleapis-common-protos==1.53.0
-    # via tensorflow-metadata
-great-expectations==0.13.36
-    # via -r ./integrations/flytekit_plugins/dolt/requirements.in
-greenlet==1.1.2
-    # via gevent
-grpcio==1.41.0
+    #   google-api-core
+    #   google-cloud-core
+google-cloud-bigquery==3.0.1
+    # via -r dev-requirements.in
+google-cloud-bigquery-storage==2.13.1
     # via
-    #   flytekit
-    #   tensorboard
-    #   tensorflow
-h5py==3.1.0
-    # via tensorflow
-hypothesis==6.23.1
-    # via -r ./integrations/flytekit_plugins/pandera_examples/requirements.in
-idna==3.2
+    #   -r dev-requirements.in
+    #   google-cloud-bigquery
+google-cloud-core==2.3.0
+    # via google-cloud-bigquery
+google-crc32c==1.3.0
+    # via google-resumable-media
+google-resumable-media==2.3.2
+    # via google-cloud-bigquery
+googleapis-common-protos==1.56.0
+    # via
+    #   google-api-core
+    #   grpcio-status
+grpcio==1.44.0
+    # via
+    #   google-api-core
+    #   google-cloud-bigquery
+    #   grpcio-status
+grpcio-status==1.44.0
+    # via google-api-core
+identify==2.4.12
+    # via pre-commit
+idna==3.3
     # via requests
-importlib-metadata==4.8.1
-    # via
-    #   great-expectations
-    #   keyring
-importlib-resources==5.2.2
-    # via tensorflow-datasets
 iniconfig==1.1.1
     # via pytest
-inotify_simple==1.2.1
-    # via sagemaker-training
-ipykernel==6.4.1
-    # via
-    #   ipywidgets
-    #   notebook
-ipython==7.28.0
-    # via
-    #   ipykernel
-    #   ipywidgets
-ipython-genutils==0.2.0
-    # via
-    #   ipykernel
-    #   ipywidgets
-    #   nbformat
-    #   notebook
-ipywidgets==7.6.5
-    # via great-expectations
-isort==5.9.3
+isort==5.10.1
     # via
     #   -r dev-requirements.in
     #   flake8-isort
-jedi==0.18.0
-    # via ipython
-jinja2==3.0.2
-    # via
-    #   altair
-    #   great-expectations
-    #   nbconvert
-    #   notebook
-jmespath==0.10.0
-    # via
-    #   boto3
-    #   botocore
-joblib==1.0.1
-    # via
-    #   -r ./case_studies/ml_training/house_price_prediction/requirements.in
-    #   -r ./case_studies/ml_training/pima_diabetes/requirements.in
-    #   scikit-learn
-jsonpatch==1.32
-    # via great-expectations
-jsonpointer==2.1
-    # via jsonpatch
-jsonschema==4.0.1
-    # via
-    #   altair
-    #   great-expectations
-    #   nbformat
-jupyter-client==7.0.6
-    # via
-    #   ipykernel
-    #   nbclient
-    #   notebook
-jupyter-core==4.8.1
-    # via
-    #   jupyter-client
-    #   nbconvert
-    #   nbformat
-    #   notebook
-jupyterlab-pygments==0.1.2
-    # via nbconvert
-jupyterlab-widgets==1.0.2
-    # via ipywidgets
-keras==2.6.0
-    # via tensorflow
-keras-preprocessing==1.1.2
-    # via tensorflow
-keyring==23.2.1
-    # via flytekit
-kiwisolver==1.3.2
-    # via matplotlib
-kubernetes==18.20.0
-    # via flytekitplugins-pod
-markdown==3.3.4
-    # via tensorboard
-markupsafe==2.0.1
-    # via jinja2
-marshmallow==3.13.0
-    # via
-    #   dataclasses-json
-    #   marshmallow-enum
-    #   marshmallow-jsonschema
-marshmallow-enum==1.5.1
-    # via dataclasses-json
-marshmallow-jsonschema==0.12.0
-    # via flytekit
-matplotlib==3.4.3
-    # via
-    #   -r ./case_studies/ml_training/house_price_prediction/../../../common/requirements-common.in
-    #   -r ./case_studies/ml_training/house_price_prediction/requirements.in
-    #   -r ./case_studies/ml_training/mnist_classifier/../../../common/requirements-common.in
-    #   -r ./case_studies/ml_training/pima_diabetes/../../../common/requirements-common.in
-    #   -r ./case_studies/ml_training/pima_diabetes/requirements.in
-    #   -r ./integrations/aws/sagemaker_pytorch/../../../common/requirements-common.in
-    #   -r ./integrations/aws/sagemaker_training/../../../common/requirements-common.in
-    #   -r ./integrations/external_services/hive/../../../common/requirements-common.in
-    #   -r ./integrations/flytekit_plugins/dolt/../../../common/requirements-common.in
-    #   -r ./integrations/flytekit_plugins/pandera_examples/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/k8s_spark/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/kfpytorch/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/pod/../../../common/requirements-common.in
-matplotlib-inline==0.1.3
-    # via
-    #   ipykernel
-    #   ipython
 mccabe==0.6.1
     # via flake8
-mistune==0.8.4
-    # via
-    #   great-expectations
-    #   nbconvert
 mock==4.0.3
     # via -r dev-requirements.in
+mypy==0.950
+    # via -r dev-requirements.in
 mypy-extensions==0.4.3
-    # via typing-inspect
-natsort==7.1.1
-    # via flytekit
-nbclient==0.5.4
-    # via nbconvert
-nbconvert==6.2.0
-    # via notebook
-nbformat==5.1.3
-    # via
-    #   ipywidgets
-    #   nbclient
-    #   nbconvert
-    #   notebook
-nest-asyncio==1.5.1
     # via
-    #   jupyter-client
-    #   nbclient
-notebook==6.4.4
-    # via widgetsnbextension
-numpy==1.19.5
-    # via
-    #   altair
-    #   great-expectations
-    #   h5py
-    #   keras-preprocessing
-    #   matplotlib
-    #   opt-einsum
-    #   pandas
-    #   pandera
-    #   pyarrow
-    #   sagemaker-training
-    #   scikit-learn
-    #   scipy
-    #   tensorboard
-    #   tensorboardx
-    #   tensorflow
-    #   tensorflow-datasets
-    #   torch
-    #   torchvision
-    #   xgboost
-oauthlib==3.1.1
-    # via requests-oauthlib
-opt-einsum==3.3.0
-    # via tensorflow
-packaging==21.0
-    # via
-    #   bleach
-    #   pandera
+    #   black
+    #   mypy
+nodeenv==1.6.0
+    # via pre-commit
+numpy==1.21.6
+    # via pyarrow
+packaging==21.3
+    # via
+    #   google-cloud-bigquery
     #   pytest
-pandas==1.3.3
-    # via
-    #   altair
-    #   dolt-integrations
-    #   flytekit
-    #   great-expectations
-    #   pandera
-pandera==0.7.2
-    # via
-    #   -r ./integrations/flytekit_plugins/pandera_examples/requirements.in
-    #   flytekitplugins-pandera
-pandocfilters==1.5.0
-    # via nbconvert
-paramiko==2.7.2
-    # via sagemaker-training
-parso==0.8.2
-    # via jedi
 pathspec==0.9.0
+    # via black
+platformdirs==2.5.2
     # via
     #   black
-    #   scantree
-pathtools==0.1.2
-    # via wandb
-pexpect==4.8.0
-    # via ipython
-pickleshare==0.7.5
-    # via ipython
-pillow==8.3.2
-    # via
-    #   matplotlib
-    #   torchvision
+    #   virtualenv
 pluggy==1.0.0
     # via pytest
-prometheus-client==0.11.0
-    # via notebook
-promise==2.3
+pre-commit==2.18.1
+    # via -r dev-requirements.in
+proto-plus==1.20.3
     # via
-    #   tensorflow-datasets
-    #   wandb
-prompt-toolkit==3.0.20
-    # via ipython
-protobuf==3.18.1
+    #   google-cloud-bigquery
+    #   google-cloud-bigquery-storage
+protobuf==3.20.1
     # via
-    #   flyteidl
-    #   flytekit
+    #   google-api-core
+    #   google-cloud-bigquery
     #   googleapis-common-protos
-    #   sagemaker-training
-    #   tensorboard
-    #   tensorboardx
-    #   tensorflow
-    #   tensorflow-datasets
-    #   tensorflow-metadata
-    #   wandb
-psutil==5.8.0
-    # via
-    #   sagemaker-training
-    #   wandb
-ptyprocess==0.7.0
-    # via
-    #   pexpect
-    #   terminado
-py==1.10.0
-    # via
-    #   pytest
-    #   retry
-py4j==0.10.9
-    # via pyspark
-pyarrow==3.0.0
-    # via
-    #   flytekit
-    #   pandera
+    #   grpcio-status
+    #   proto-plus
+py==1.11.0
+    # via pytest
+pyarrow==6.0.1
+    # via google-cloud-bigquery
 pyasn1==0.4.8
     # via
     #   pyasn1-modules
     #   rsa
 pyasn1-modules==0.2.8
     # via google-auth
-pycodestyle==2.7.0
+pycodestyle==2.8.0
     # via flake8
-pycparser==2.20
-    # via cffi
-pyflakes==2.3.1
+pyflakes==2.4.0
     # via flake8
-pygments==2.10.0
-    # via
-    #   ipython
-    #   jupyterlab-pygments
-    #   nbconvert
-pynacl==1.4.0
-    # via paramiko
 pyparsing==2.4.7
-    # via
-    #   great-expectations
-    #   matplotlib
-    #   packaging
-pyrsistent==0.18.0
-    # via jsonschema
-pyspark==3.1.2
-    # via
-    #   -r ./integrations/kubernetes/k8s_spark/requirements.in
-    #   flytekitplugins-spark
-pytest==6.2.5
+    # via packaging
+pytest==7.1.2
     # via -r dev-requirements.in
-python-dateutil==2.8.1
-    # via
-    #   botocore
-    #   croniter
-    #   flytekit
-    #   great-expectations
-    #   jupyter-client
-    #   kubernetes
-    #   matplotlib
-    #   pandas
-    #   wandb
-python-json-logger==2.0.2
-    # via flytekit
-pytimeparse==1.1.8
-    # via flytekit
-pytz==2018.4
-    # via
-    #   flytekit
-    #   great-expectations
-    #   pandas
-pyyaml==5.4.1
-    # via
-    #   kubernetes
-    #   wandb
-pyzmq==22.3.0
-    # via
-    #   jupyter-client
-    #   notebook
-regex==2021.9.30
-    # via
-    #   black
-    #   docker-image-py
-requests==2.26.0
-    # via
-    #   flytekit
-    #   great-expectations
-    #   kubernetes
-    #   requests-oauthlib
-    #   responses
-    #   tensorboard
-    #   tensorflow-datasets
-    #   wandb
-requests-oauthlib==1.3.0
-    # via
-    #   google-auth-oauthlib
-    #   kubernetes
-responses==0.14.0
-    # via flytekit
-retry==0.9.2
-    # via flytekit
-retrying==1.3.3
-    # via sagemaker-training
-rsa==4.7.2
+python-dateutil==2.8.2
+    # via google-cloud-bigquery
+pyyaml==6.0
+    # via pre-commit
+requests==2.27.1
+    # via
+    #   google-api-core
+    #   google-cloud-bigquery
+rsa==4.8
     # via google-auth
-ruamel.yaml==0.17.16
-    # via great-expectations
-ruamel.yaml.clib==0.2.6
-    # via ruamel.yaml
-s3transfer==0.5.0
-    # via boto3
-sagemaker-training==3.9.2
-    # via flytekitplugins-awssagemaker
-scantree==0.0.1
-    # via dirhash
-scikit-learn==1.0
-    # via sklearn
-scipy==1.7.1
-    # via
-    #   great-expectations
-    #   sagemaker-training
-    #   scikit-learn
-    #   xgboost
-send2trash==1.8.0
-    # via notebook
-sentry-sdk==1.4.3
-    # via wandb
-shortuuid==1.0.1
-    # via wandb
-six==1.15.0
+six==1.16.0
     # via
-    #   absl-py
-    #   astunparse
-    #   bcrypt
-    #   bleach
-    #   cycler
-    #   docker-pycreds
-    #   flytekit
     #   google-auth
-    #   google-pasta
     #   grpcio
-    #   keras-preprocessing
-    #   kubernetes
-    #   promise
-    #   pynacl
     #   python-dateutil
-    #   responses
-    #   retrying
-    #   sagemaker-training
-    #   scantree
-    #   tensorflow
-    #   tensorflow-datasets
-    #   wandb
-sklearn==0.0
-    # via
-    #   -r ./case_studies/ml_training/house_price_prediction/requirements.in
-    #   -r ./case_studies/ml_training/pima_diabetes/requirements.in
-smmap==4.0.0
-    # via gitdb
-sortedcontainers==2.4.0
-    # via
-    #   flytekit
-    #   hypothesis
-statsd==3.3.0
-    # via flytekit
-subprocess32==3.5.4
-    # via wandb
-tabulate==0.8.9
-    # via
-    #   -r ./case_studies/ml_training/house_price_prediction/requirements.in
-    #   -r ./case_studies/ml_training/pima_diabetes/requirements.in
-tensorboard==2.6.0
-    # via tensorflow
-tensorboard-data-server==0.6.1
-    # via tensorboard
-tensorboard-plugin-wit==1.8.0
-    # via tensorboard
-tensorboardx==2.4
-    # via
-    #   -r ./integrations/aws/sagemaker_pytorch/requirements.in
-    #   -r ./integrations/kubernetes/kfpytorch/requirements.in
-tensorflow==2.6.0
-    # via -r ./integrations/aws/sagemaker_training/requirements.in
-tensorflow-datasets==4.4.0
-    # via -r ./integrations/aws/sagemaker_training/requirements.in
-tensorflow-estimator==2.6.0
-    # via tensorflow
-tensorflow-metadata==1.2.0
-    # via tensorflow-datasets
-termcolor==1.1.0
-    # via
-    #   great-expectations
-    #   tensorflow
-    #   tensorflow-datasets
-    #   yaspin
-terminado==0.12.1
-    # via notebook
-testfixtures==6.18.3
+    #   virtualenv
+testfixtures==6.18.5
     # via flake8-isort
-testpath==0.5.0
-    # via nbconvert
-threadpoolctl==3.0.0
-    # via scikit-learn
 toml==0.10.2
+    # via pre-commit
+tomli==2.0.1
     # via
     #   black
     #   flake8-black
+    #   mypy
     #   pytest
-toolz==0.11.1
-    # via altair
-torch==1.8.1
-    # via
-    #   -r ./case_studies/ml_training/mnist_classifier/requirements.in
-    #   -r ./integrations/aws/sagemaker_pytorch/requirements.in
-    #   -r ./integrations/kubernetes/kfpytorch/requirements.in
-    #   torchvision
-torchvision==0.9.1
-    # via
-    #   -r ./case_studies/ml_training/mnist_classifier/requirements.in
-    #   -r ./integrations/aws/sagemaker_pytorch/requirements.in
-    #   -r ./integrations/kubernetes/kfpytorch/requirements.in
-tornado==6.1
-    # via
-    #   ipykernel
-    #   jupyter-client
-    #   notebook
-    #   terminado
-tqdm==4.62.3
-    # via
-    #   great-expectations
-    #   tensorflow-datasets
-traitlets==5.1.0
-    # via
-    #   ipykernel
-    #   ipython
-    #   ipywidgets
-    #   jupyter-client
-    #   jupyter-core
-    #   matplotlib-inline
-    #   nbclient
-    #   nbconvert
-    #   nbformat
-    #   notebook
-typed-ast==1.4.3
-    # via black
-typing-extensions==3.7.4.3
-    # via
-    #   gitpython
-    #   tensorflow
-    #   torch
-    #   typing-inspect
-typing-inspect==0.7.1
-    # via
-    #   dataclasses-json
-    #   pandera
-tzlocal==3.0
-    # via great-expectations
-urllib3==1.26.7
-    # via
-    #   botocore
-    #   flytekit
-    #   kubernetes
-    #   requests
-    #   responses
-    #   sentry-sdk
-wandb==0.12.4
-    # via -r ./case_studies/ml_training/mnist_classifier/requirements.in
-wcwidth==0.2.5
-    # via prompt-toolkit
-webencodings==0.5.1
-    # via bleach
-websocket-client==1.2.1
-    # via kubernetes
-werkzeug==2.0.2
-    # via
-    #   sagemaker-training
-    #   tensorboard
-wheel==0.37.0
+typing-extensions==4.2.0
     # via
-    #   -r ./case_studies/ml_training/house_price_prediction/../../../common/requirements-common.in
-    #   -r ./case_studies/ml_training/mnist_classifier/../../../common/requirements-common.in
-    #   -r ./case_studies/ml_training/pima_diabetes/../../../common/requirements-common.in
-    #   -r ./integrations/aws/sagemaker_pytorch/../../../common/requirements-common.in
-    #   -r ./integrations/aws/sagemaker_training/../../../common/requirements-common.in
-    #   -r ./integrations/external_services/hive/../../../common/requirements-common.in
-    #   -r ./integrations/flytekit_plugins/dolt/../../../common/requirements-common.in
-    #   -r ./integrations/flytekit_plugins/pandera_examples/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/k8s_spark/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/kfpytorch/../../../common/requirements-common.in
-    #   -r ./integrations/kubernetes/pod/../../../common/requirements-common.in
-    #   astunparse
-    #   flytekit
-    #   tensorboard
-    #   tensorflow
-widgetsnbextension==3.5.1
-    # via ipywidgets
-wrapt==1.12.1
-    # via
-    #   deprecated
-    #   flytekit
-    #   pandera
-    #   tensorflow
-xgboost==1.4.2
-    # via
-    #   -r ./case_studies/ml_training/house_price_prediction/requirements.in
-    #   -r ./case_studies/ml_training/pima_diabetes/requirements.in
-yaspin==2.1.0
-    # via wandb
-zipp==3.6.0
-    # via
-    #   importlib-metadata
-    #   importlib-resources
-zope.event==4.5.0
-    # via gevent
-zope.interface==5.4.0
-    # via gevent
-
-# The following packages are considered to be unsafe in a requirements file:
-# pip
-# setuptools
+    #   black
+    #   mypy
+urllib3==1.26.9
+    # via requests
+virtualenv==20.14.1
+    # via pre-commit
diff --git a/cookbook/docs/README.md b/cookbook/docs/README.md
index 17383f4efa..9726c65b83 100644
--- a/cookbook/docs/README.md
+++ b/cookbook/docs/README.md
@@ -12,7 +12,7 @@ To make this work, it is essential that the examples are written with comments f
  - The example directory should have a README.rst.
  - The example itself should have a header comment, which should have a heading
    as well.
- - Docs interspersed in the example should preceed with `# %%` comment and then
+ - Docs interspersed in the example should proceed with `# %%` comment and then
    multiline comments should not have blank spaces between them.
   ```rst
   # %%
diff --git a/cookbook/docs/conf.py b/cookbook/docs/conf.py
index 94fd73401f..75a3866283 100644
--- a/cookbook/docs/conf.py
+++ b/cookbook/docs/conf.py
@@ -60,8 +60,7 @@ class CustomSorter(FileNameSortKey):
         # Type System
         "flyte_python_types.py",
         "schema.py",
-        "structured_dataset.py"
-        "typed_schema.py",
+        "structured_dataset.py" "typed_schema.py",
         "custom_objects.py",
         "enums.py",
         "lp_schedules.py",
@@ -127,9 +126,7 @@ class CustomSorter(FileNameSortKey):
         ## GCP
         # TODO
         ## External Services
-        "hive.py"
-        "snowflake.py"
-        "bigquery.py"
+        "hive.py" "snowflake.py" "bigquery.py"
         # Extending Flyte
         "backend_plugins.py",  # NOTE: for some reason this needs to be listed first here to show up last on the TOC
         "custom_types.py",
@@ -347,7 +344,7 @@ def __call__(self, filename):
 sphinx_gallery_conf = {
     "examples_dirs": examples_dirs,
     "gallery_dirs": gallery_dirs,
-    "ignore_pattern": f"({'|'.join(ignore_py_files)})\.py",
+    "ignore_pattern": f"({'|'.join(ignore_py_files)})\.py",  # noqa: W605
     # "subsection_order": ExplicitOrder(
     #     [
     #         "../core/basic",
diff --git a/cookbook/integrations/aws/batch/batch.py b/cookbook/integrations/aws/batch/batch.py
index 8d99d6bbc1..f65406b9ed 100644
--- a/cookbook/integrations/aws/batch/batch.py
+++ b/cookbook/integrations/aws/batch/batch.py
@@ -6,9 +6,8 @@
  that you use to run your jobs, allowing you to focus on analyzing results and solving problems.
 """
 
-from flytekitplugins.awsbatch import AWSBatchConfig
 from flytekit import task, workflow
-
+from flytekitplugins.awsbatch import AWSBatchConfig
 
 # %%
 # Use this to configure SubmitJobInput for a AWS batch job. Task's marked with this will automatically execute
diff --git a/cookbook/integrations/aws/sagemaker_pytorch/sagemaker_pytorch_distributed_training.py b/cookbook/integrations/aws/sagemaker_pytorch/sagemaker_pytorch_distributed_training.py
index b2fb481eca..5f2c8af960 100644
--- a/cookbook/integrations/aws/sagemaker_pytorch/sagemaker_pytorch_distributed_training.py
+++ b/cookbook/integrations/aws/sagemaker_pytorch/sagemaker_pytorch_distributed_training.py
@@ -353,7 +353,9 @@ def download_test_data(training_dir):
             input_content_type=InputContentType.TEXT_CSV,
         ),
         training_job_resource_config=TrainingJobResourceConfig(
-            instance_type="ml.p3.8xlarge", instance_count=2, volume_size_in_gb=25,
+            instance_type="ml.p3.8xlarge",
+            instance_count=2,
+            volume_size_in_gb=25,
         ),
     ),
     cache_version="1.0",
diff --git a/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py b/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py
index 6e218321ea..bd672ee62a 100644
--- a/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py
+++ b/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py
@@ -79,7 +79,9 @@
     task_config=SagemakerTrainingJobConfig(
         algorithm_specification=alg_spec,
         training_job_resource_config=TrainingJobResourceConfig(
-            instance_type="ml.m4.xlarge", instance_count=1, volume_size_in_gb=25,
+            instance_type="ml.m4.xlarge",
+            instance_count=1,
+            volume_size_in_gb=25,
         ),
     ),
     metadata=TaskMetadata(cache_version="1.0", cache=True),
@@ -106,7 +108,9 @@
 # and split it and uploaded to an s3 bucket:
 def execute_training():
     xgboost_train_task(
-        static_hyperparameters=xgboost_hyperparameters, train="", validation="",
+        static_hyperparameters=xgboost_hyperparameters,
+        train="",
+        validation="",
     )
 
 
diff --git a/cookbook/integrations/aws/sagemaker_training/sagemaker_custom_training.py b/cookbook/integrations/aws/sagemaker_training/sagemaker_custom_training.py
index 7edf57b706..74114a825f 100644
--- a/cookbook/integrations/aws/sagemaker_training/sagemaker_custom_training.py
+++ b/cookbook/integrations/aws/sagemaker_training/sagemaker_custom_training.py
@@ -5,13 +5,13 @@
 with very few modifications.
 """
 import typing
+from typing import Tuple
 
 import matplotlib.pyplot as plt
 import tensorflow as tf
 import tensorflow_datasets as tfds
 from flytekit import task, workflow
 from flytekit.types.directory import TensorboardLogs
-from typing import Tuple
 
 # %%
 # Training Algorithm
@@ -85,7 +85,9 @@ def normalize_img(image, label):
             input_content_type=InputContentType.TEXT_CSV,
         ),
         training_job_resource_config=TrainingJobResourceConfig(
-            instance_type="ml.m4.xlarge", instance_count=1, volume_size_in_gb=25,
+            instance_type="ml.m4.xlarge",
+            instance_count=1,
+            volume_size_in_gb=25,
         ),
     ),
     cache_version="1.0",
@@ -132,7 +134,10 @@ def custom_training_task(epochs: int, batch_size: int) -> TrainingOutputs:
     tb_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
 
     history = model.fit(
-        ds_train, epochs=epochs, validation_data=ds_test, callbacks=[tb_callback],
+        ds_train,
+        epochs=epochs,
+        validation_data=ds_test,
+        callbacks=[tb_callback],
     )
 
     serialized_model = "my_model.h5"
@@ -194,9 +199,9 @@ def mnist_trainer(
 # %%
 # As long as you have tensorflow setup locally, it will run like a regular python script.
 if __name__ == "__main__":
-    model, accurracy, loss, logs = mnist_trainer()
+    model, accuracy, loss, logs = mnist_trainer()
     print(
-        f"Model: {model}, Accuracy PNG: {accurracy}, loss PNG: {loss}, Tensorboard Log Dir: {logs}"
+        f"Model: {model}, Accuracy PNG: {accuracy}, loss PNG: {loss}, Tensorboard Log Dir: {logs}"
     )
 
 # %%
@@ -220,6 +225,6 @@ def mnist_trainer(
 #
 #
 # If running remotely (executing on Flyte hosted environment), the workflow execution outputs can be retrieved.
-# 
+#
 # You can retrieve the outputs - which will be a path to a blob store like S3, GCS, minio, etc. Tensorboad can be
 # pointed to on your local laptop to visualize the results.
diff --git a/cookbook/integrations/external_services/snowflake/snowflake.py b/cookbook/integrations/external_services/snowflake/snowflake.py
index 23c788032c..8bb0fde84e 100644
--- a/cookbook/integrations/external_services/snowflake/snowflake.py
+++ b/cookbook/integrations/external_services/snowflake/snowflake.py
@@ -5,9 +5,9 @@
 This example shows how to use a Flyte SnowflakeTask to execute a query.
 """
 
-from flytekit import kwtypes, task, workflow
-from flytekit.types.schema import FlyteSchema
+from flytekit import kwtypes, workflow
 from flytekitplugins.snowflake import SnowflakeConfig, SnowflakeTask
+
 # %%
 # This is the world's simplest query. Note that in order for registration to work properly, you'll need to give your
 # Snowflake task a name that's unique across your project/domain for your Flyte installation.
@@ -16,8 +16,12 @@
     inputs={},
     query_template="SELECT 1",
     output_schema_type=None,
-    task_config=SnowflakeConfig(account="ha63105.us-central1.gcp", database="SNOWFLAKE_SAMPLE_DATA",
-                                schema="TPCH_SF1000", warehouse="COMPUTE_WH"),
+    task_config=SnowflakeConfig(
+        account="ha63105.us-central1.gcp",
+        database="SNOWFLAKE_SAMPLE_DATA",
+        schema="TPCH_SF1000",
+        warehouse="COMPUTE_WH",
+    ),
 )
 
 
@@ -57,8 +61,12 @@ def no_io_wf():
     name="sql.snowflake.w_io",
     # Define inputs as well as their types that can be used to customize the query.
     inputs=kwtypes(nation_key=int),
-    task_config=SnowflakeConfig(account="ha63105.us-central1.gcp", database="SNOWFLAKE_SAMPLE_DATA",
-                                schema="TPCH_SF1000", warehouse="COMPUTE_WH"),
+    task_config=SnowflakeConfig(
+        account="ha63105.us-central1.gcp",
+        database="SNOWFLAKE_SAMPLE_DATA",
+        schema="TPCH_SF1000",
+        warehouse="COMPUTE_WH",
+    ),
     query_template="SELECT * from CUSTOMER where C_NATIONKEY =  {{ .inputs.nation_key }} limit 100",
 )
 
@@ -67,6 +75,7 @@ def no_io_wf():
 def full_snowflake_wf(nation_key: int):
     return snowflake_task_templatized_query(nation_key=nation_key)
 
+
 # %%
 # Check query result on snowflake console: ``https://<account>.snowflakecomputing.com/console#/monitoring/queries/detail``
 #
diff --git a/cookbook/integrations/flytekit_plugins/dolt/dolt_branch_example.py b/cookbook/integrations/flytekit_plugins/dolt/dolt_branch_example.py
index be785c77dd..c75db662fc 100644
--- a/cookbook/integrations/flytekit_plugins/dolt/dolt_branch_example.py
+++ b/cookbook/integrations/flytekit_plugins/dolt/dolt_branch_example.py
@@ -9,11 +9,10 @@
 import sys
 import typing
 
+import pandas as pd
 from dolt_integrations.core import NewBranch
-from flytekitplugins.dolt.schema import DoltConfig, DoltTable
 from flytekit import task, workflow
-import pandas as pd
-
+from flytekitplugins.dolt.schema import DoltConfig, DoltTable
 
 # %%
 # A Simple Workflow
@@ -34,11 +33,10 @@
 
 doltdb_path = os.path.join(os.path.dirname(__file__), "foo")
 
+
 def generate_confs(a: int) -> typing.Tuple[DoltConfig, DoltConfig, DoltConfig]:
     users_conf = DoltConfig(
-        db_path=doltdb_path,
-        tablename="users",
-        branch_conf=NewBranch(f"run/a_is_{a}")
+        db_path=doltdb_path, tablename="users", branch_conf=NewBranch(f"run/a_is_{a}")
     )
 
     query_users = DoltTable(
@@ -57,6 +55,7 @@ def generate_confs(a: int) -> typing.Tuple[DoltConfig, DoltConfig, DoltConfig]:
 
     return users_conf, query_users, big_users_conf
 
+
 # %%
 # .. tip ::
 #   A ``DoltTable`` is an  extension of ``DoltConfig`` that wraps a ``pandas.DataFrame`` -- accessible via the ``DoltTable.data``
@@ -74,33 +73,43 @@ def generate_confs(a: int) -> typing.Tuple[DoltConfig, DoltConfig, DoltConfig]:
 # Return types of ``DoltTable`` save the ``data`` to the
 # Dolt database given a connection configuration.
 
+
 @task
 def get_confs(a: int) -> typing.Tuple[DoltConfig, DoltTable, DoltConfig]:
     return generate_confs(a)
 
+
 @task
 def populate_users(a: int, conf: DoltConfig) -> DoltTable:
-    users = [("George", a), ("Alice", a*2), ("Stephanie", a*3)]
+    users = [("George", a), ("Alice", a * 2), ("Stephanie", a * 3)]
     df = pd.DataFrame(users, columns=["name", "count"])
     return DoltTable(data=df, config=conf)
 
+
 @task
-def filter_users(a: int, all_users: DoltTable, filtered_users: DoltTable, conf: DoltConfig) -> DoltTable:
+def filter_users(
+    a: int, all_users: DoltTable, filtered_users: DoltTable, conf: DoltConfig
+) -> DoltTable:
     usernames = filtered_users.data[["name"]]
     return DoltTable(data=usernames, config=conf)
 
+
 @task
 def count_users(users: DoltTable) -> int:
     return users.data.shape[0]
 
+
 @workflow
 def wf(a: int) -> int:
     user_conf, query_conf, big_user_conf = get_confs(a=a)
     users = populate_users(a=a, conf=user_conf)
-    big_users = filter_users(a=a, all_users=users, filtered_users=query_conf, conf=big_user_conf)
+    big_users = filter_users(
+        a=a, all_users=users, filtered_users=query_conf, conf=big_user_conf
+    )
     big_user_cnt = count_users(users=big_users)
     return big_user_cnt
 
+
 if __name__ == "__main__":
     print(f"Running {__file__} main...")
     if len(sys.argv) != 2:
@@ -109,9 +118,9 @@ def wf(a: int) -> int:
     result = wf(a=a)
     print(f"Running wf(), returns int\n{result}\n{type(result)}")
 
-# %% 
+# %%
 # We will run this workflow twice:
-# 
+#
 # .. prompt:: $
 #
 #   python branch_example.py 2
@@ -125,4 +134,4 @@ def wf(a: int) -> int:
 # .. prompt:: $
 #
 #   cd foo
-#   dolt branch
\ No newline at end of file
+#   dolt branch
diff --git a/cookbook/integrations/flytekit_plugins/dolt/dolt_quickstart_example.py b/cookbook/integrations/flytekit_plugins/dolt/dolt_quickstart_example.py
index 88acc51393..2f0e2ee465 100644
--- a/cookbook/integrations/flytekit_plugins/dolt/dolt_quickstart_example.py
+++ b/cookbook/integrations/flytekit_plugins/dolt/dolt_quickstart_example.py
@@ -12,9 +12,9 @@
 import os
 import sys
 
-from flytekitplugins.dolt.schema import DoltConfig, DoltTable
-from flytekit import task, workflow
 import pandas as pd
+from flytekit import task, workflow
+from flytekitplugins.dolt.schema import DoltConfig, DoltTable
 
 # %%
 # Next, we initialize Dolt's config.
@@ -26,19 +26,23 @@
 )
 
 # %%
-# We define a task to create a DataFrame and store the table in Dolt. 
+# We define a task to create a DataFrame and store the table in Dolt.
+
+
 @task
 def populate_rabbits(a: int) -> DoltTable:
     rabbits = [("George", a), ("Alice", a * 2), ("Sugar Maple", a * 3)]
     df = pd.DataFrame(rabbits, columns=["name", "count"])
     return DoltTable(data=df, config=rabbits_conf)
 
+
 # %%
 # ``unwrap_rabbits`` task does the exact opposite -- reading the table from Dolt and returning a DataFrame.
 @task
 def unwrap_rabbits(table: DoltTable) -> pd.DataFrame:
     return table.data
 
+
 # %%
 # Our workflow combines the above two tasks:
 @workflow
@@ -47,6 +51,7 @@ def wf(a: int) -> pd.DataFrame:
     df = unwrap_rabbits(table=rabbits)
     return df
 
+
 if __name__ == "__main__":
     print(f"Running {__file__} main...")
     if len(sys.argv) != 2:
diff --git a/cookbook/integrations/flytekit_plugins/greatexpectations/type_example.py b/cookbook/integrations/flytekit_plugins/greatexpectations/type_example.py
index a13dea0e08..5f2f2331a3 100644
--- a/cookbook/integrations/flytekit_plugins/greatexpectations/type_example.py
+++ b/cookbook/integrations/flytekit_plugins/greatexpectations/type_example.py
@@ -14,17 +14,12 @@
 # %%
 # First, let's import the required libraries.
 import os
-import typing
 
 import pandas as pd
 from flytekit import Resources, task, workflow
 from flytekit.types.file import CSVFile
 from flytekit.types.schema import FlyteSchema
-from flytekitplugins.great_expectations import (
-    BatchRequestConfig,
-    GreatExpectationsFlyteConfig,
-    GreatExpectationsType,
-)
+from flytekitplugins.great_expectations import BatchRequestConfig, GreatExpectationsFlyteConfig, GreatExpectationsType
 
 # %%
 # .. note::
@@ -46,21 +41,23 @@
 # The directory that's being used is defined in ``my_assets``. You can find ``my_assets`` in the Great Expectations config file.
 #
 # The parameters within the ``data_connector_query`` convey that we're fetching all those files that have "2019" and "01" in the file names.
+
+
 @task(limits=Resources(mem="500Mi"))
 def simple_task(
     directory: GreatExpectationsType[
         str,
         GreatExpectationsFlyteConfig(
-            datasource_name="data",
-            expectation_suite_name="test.demo",
-            data_connector_name="my_data_connector",
+            datasource_name="data",  # noqa: F821
+            expectation_suite_name="test.demo",  # noqa: F821
+            data_connector_name="my_data_connector",  # noqa: F821
             batch_request_config=BatchRequestConfig(
                 data_connector_query={
-                    "batch_filter_parameters": {
-                        "year": "2019",
-                        "month": "01",  # noqa: F722
+                    "batch_filter_parameters": {  # noqa: F821
+                        "year": "2019",  # noqa: F821
+                        "month": "01",  # noqa: F821, F722
                     },
-                    "limit": 10,
+                    "limit": 10,  # noqa: F821
                 },
             ),
             context_root_dir=CONTEXT_ROOT_DIR,
@@ -97,6 +94,8 @@ def simple_wf(directory: str = "my_assets") -> str:
 #
 # The first value that's being sent within ``GreatExpectationsType`` is ``CSVFile`` (this is a pre-formatted FlyteFile type).
 # This means that we want to validate the ``FlyteFile`` data.
+
+
 @task(limits=Resources(mem="500Mi"))
 def file_task(
     dataset: GreatExpectationsType[CSVFile, great_expectations_config]
@@ -122,12 +121,14 @@ def file_wf() -> pd.DataFrame:
 @task(limits=Resources(mem="500Mi"))
 def schema_task(
     dataframe: GreatExpectationsType[
-        FlyteSchema,
-        GreatExpectationsFlyteConfig(
-            datasource_name="data",
-            expectation_suite_name="test.demo",
-            data_connector_name="data_flytetype_data_connector",
-            batch_request_config=BatchRequestConfig(data_connector_query={"limit": 10}),
+        FlyteSchema,  # noqa: F821
+        GreatExpectationsFlyteConfig(  # noqa: F821
+            datasource_name="data",  # noqa: F821
+            expectation_suite_name="test.demo",  # noqa: F821
+            data_connector_name="data_flytetype_data_connector",  # noqa: F821
+            batch_request_config=BatchRequestConfig(
+                data_connector_query={"limit": 10}  # noqa : F841
+            ),  # noqa: F821
             local_file_path="/tmp/test.parquet",  # noqa: F722
             context_root_dir=CONTEXT_ROOT_DIR,
         ),
diff --git a/cookbook/integrations/flytekit_plugins/modin_examples/knn_classifier.py b/cookbook/integrations/flytekit_plugins/modin_examples/knn_classifier.py
index e4875fe2ae..3aba9de846 100644
--- a/cookbook/integrations/flytekit_plugins/modin_examples/knn_classifier.py
+++ b/cookbook/integrations/flytekit_plugins/modin_examples/knn_classifier.py
@@ -25,14 +25,14 @@
 # Let's import the necessary dependencies.
 from typing import List, NamedTuple
 
-import flytekitplugins.modin
+import flytekitplugins.modin  # noqa: F401
 import modin.pandas
 import ray
 from flytekit import task, workflow
 from sklearn.datasets import load_wine
-from sklearn.neighbors import KNeighborsClassifier
 from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
 
 ray.shutdown()  # close previous instance of ray (if any)
 ray.init(num_cpus=2)  # open a new instance of ray
diff --git a/cookbook/integrations/flytekit_plugins/pandera_examples/basic_schema_example.py b/cookbook/integrations/flytekit_plugins/pandera_examples/basic_schema_example.py
index 16d5c090ad..29eeb24e35 100644
--- a/cookbook/integrations/flytekit_plugins/pandera_examples/basic_schema_example.py
+++ b/cookbook/integrations/flytekit_plugins/pandera_examples/basic_schema_example.py
@@ -9,28 +9,31 @@
 
 import typing
 
-import flytekitplugins.pandera
+import flytekitplugins.pandera  # noqa : F401
 import pandas as pd
 import pandera as pa
 from flytekit import task, workflow
 from pandera.typing import DataFrame, Series
 
-
 # %%
 # A Simple Data Processing Pipeline
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # Let's first define a simple data processing pipeline in pure python.
 
+
 def total_pay(df):
     return df.assign(total_pay=df.hourly_pay * df.hours_worked)
 
+
 def add_id(df, worker_id):
     return df.assign(worker_id=worker_id)
 
+
 def process_data(df, worker_id):
     return add_id(df=total_pay(df=df), worker_id=worker_id)
 
+
 # %%
 # As you can see, the ``process_data`` function is composed of two simpler functions:
 # One that computes ``total_pay`` and another that simply adds an ``id`` column to
@@ -43,6 +46,7 @@ def process_data(df, worker_id):
 # Next we define the schemas that provide type and statistical annotations
 # for the raw, intermediate, and final outputs of our pipeline.
 
+
 class InSchema(pa.SchemaModel):
     hourly_pay: Series[float] = pa.Field(ge=7)
     hours_worked: Series[float] = pa.Field(ge=10)
@@ -55,6 +59,7 @@ def check_numbers_are_positive(cls, series: Series) -> Series[bool]:
     class Config:
         coerce = True
 
+
 class IntermediateSchema(InSchema):
     total_pay: Series[float]
 
@@ -63,9 +68,11 @@ def check_total_pay(cls, df: DataFrame) -> Series[bool]:
         """Defines a dataframe-level custom check."""
         return df["total_pay"] == df["hourly_pay"] * df["hours_worked"]
 
+
 class OutSchema(IntermediateSchema):
     worker_id: Series[str] = pa.Field()
 
+
 # %%
 # Columns are specified as class attributes with a specified data type using the
 # type-hinting syntax, and you can place additional statistical constraints on the
@@ -74,7 +81,7 @@ class OutSchema(IntermediateSchema):
 # :py:func:`~pandera.model_components.dataframe_check` (dataframe-level checks), which automatically make them
 # class methods.
 #
-# Pandera uses inheritence to make sure that :py:class:`~pandera.model.SchemaModel` subclasses contain
+# Pandera uses inheritance to make sure that :py:class:`~pandera.model.SchemaModel` subclasses contain
 # all of the same columns and custom check methods as their base class. Inheritance semantics
 # apply to schema models so you can override column attributes or check methods in subclasses. This has
 # the nice effect of providing an explicit graph of type dependencies as data
@@ -89,32 +96,41 @@ class OutSchema(IntermediateSchema):
 # by decorating our functions with the :py:func:`~flytekit.task` and :py:func:`~flytekit.workflow` decorators and
 # annotating the inputs and outputs of those functions with the pandera schemas:
 
+
 @task
 def dict_to_dataframe(data: dict) -> DataFrame[InSchema]:
     """Helper task to convert a dictionary input to a dataframe."""
     return pd.DataFrame(data)
 
+
 @task
-def total_pay(df: DataFrame[InSchema]) -> DataFrame[IntermediateSchema]:
+def total_pay(df: DataFrame[InSchema]) -> DataFrame[IntermediateSchema]:  # noqa : F811
     return df.assign(total_pay=df.hourly_pay * df.hours_worked)
 
+
 @task
-def add_ids(df: DataFrame[IntermediateSchema], worker_ids: typing.List[str]) -> DataFrame[OutSchema]:
+def add_ids(
+    df: DataFrame[IntermediateSchema], worker_ids: typing.List[str]
+) -> DataFrame[OutSchema]:
     return df.assign(worker_id=worker_ids)
 
+
 @workflow
-def process_data(
-    data: dict = {"hourly_pay": [12.0, 13.5, 10.1], "hours_worked": [30.5, 40.0, 41.75]},
-    worker_ids: typing.List[str] = ["a", "b", "c"]
+def process_data(  # noqa : F811
+    data: dict = {
+        "hourly_pay": [12.0, 13.5, 10.1],
+        "hours_worked": [30.5, 40.0, 41.75],
+    },
+    worker_ids: typing.List[str] = ["a", "b", "c"],
 ) -> DataFrame[OutSchema]:
-    return add_ids(
-        df=total_pay(df=dict_to_dataframe(data=data)), worker_ids=worker_ids
-    )
+    return add_ids(df=total_pay(df=dict_to_dataframe(data=data)), worker_ids=worker_ids)
+
 
 if __name__ == "__main__":
     print(f"Running {__file__} main...")
     result = process_data(
-        data={"hourly_pay": [12.0, 13.5, 10.1], "hours_worked": [30.5, 40.0, 41.75]}, worker_ids=["a", "b", "c"]
+        data={"hourly_pay": [12.0, 13.5, 10.1], "hours_worked": [30.5, 40.0, 41.75]},
+        worker_ids=["a", "b", "c"],
     )
     print(f"Running wf(), returns dataframe\n{result}\n{result.dtypes}")
 
diff --git a/cookbook/integrations/flytekit_plugins/pandera_examples/validating_and_testing_ml_pipelines.py b/cookbook/integrations/flytekit_plugins/pandera_examples/validating_and_testing_ml_pipelines.py
index f9b8467f16..00fc81c52e 100644
--- a/cookbook/integrations/flytekit_plugins/pandera_examples/validating_and_testing_ml_pipelines.py
+++ b/cookbook/integrations/flytekit_plugins/pandera_examples/validating_and_testing_ml_pipelines.py
@@ -38,21 +38,19 @@
 
 import typing
 
+import flytekitplugins.pandera  # noqa: F401
 import joblib
 import pandas as pd
 import pandera as pa
 from flytekit import task, workflow
 from flytekit.types.file import JoblibSerializedFile
-from pandera.typing import DataFrame, Series, Index
+from pandera.typing import DataFrame, Index, Series  # noqa: F401
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import accuracy_score
 
-
 # %%
 # We also need to import the ``pandera`` flytekit plugin to enable dataframe runtime type-checking:
 
-import flytekitplugins.pandera
-
 
 # %%
 # The Dataset: UCI Heart Disease
@@ -96,7 +94,7 @@
 #    * - ``ca``
 #      - number of major vessels (0-3) colored by flourosopy
 #    * - ``thal``
-#      - 3 = normal; 6 = fixed defect; 7 = reversable defect
+#      - 3 = normal; 6 = fixed defect; 7 = reversible defect
 #    * - ``target``
 #      - the predicted attribute
 #
@@ -148,7 +146,7 @@ class RawData(pa.SchemaModel):
         isin=[
             3,  # normal
             6,  # fixed defect
-            7,  # reversable defect
+            7,  # reversible defect
         ]
     )
     target: Series[int] = pa.Field(ge=0, le=4)
@@ -195,7 +193,7 @@ def fetch_raw_data() -> DataFrame[RawData]:
 # having values ranging from ``0 - 4``` to a binary representation where ``0`` represents absence of heart disease and
 # ``1`` represents presence of heart disease.
 #
-# Here we can use inheritence to define a ``ParsedData`` schema by overriding just the ``target`` attribute:
+# Here we can use inheritance to define a ``ParsedData`` schema by overriding just the ``target`` attribute:
 
 
 class ParsedData(RawData):
@@ -220,7 +218,6 @@ def parse_raw_data(raw_data: DataFrame[RawData]) -> DataFrame[ParsedData]:
 # Now it's time to split the data into a training and test set. Here we'll showcase the utility of
 # :ref:`named outputs <sphx_glr_auto_core_flyte_basics_named_outputs.py>` combined with pandera schemas.
 
-import typing
 
 DataSplits = typing.NamedTuple(
     "DataSplits", training_set=DataFrame[ParsedData], test_set=DataFrame[ParsedData]
@@ -330,7 +327,7 @@ class NegativeExamples(ParsedData):
     @given(
         PositiveExamples.strategy(size=5),
         NegativeExamples.strategy(size=5),
-        st.integers(min_value=0, max_value=2 ** 32),
+        st.integers(min_value=0, max_value=2**32),
     )
     @hypothesis.settings(
         deadline=1000,
diff --git a/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py b/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py
index 5c0ba4f9c6..c176bac7ef 100644
--- a/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py
+++ b/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py
@@ -19,12 +19,10 @@
 
 # %%
 # Let's first import the libraries.
-import pandas
 from flytekit import kwtypes, task, workflow
 from flytekit.types.schema import FlyteSchema
 from flytekitplugins.sqlalchemy import SQLAlchemyConfig, SQLAlchemyTask
 
-
 # %%
 # First we define a ``SQLALchemyTask``, which returns the first ``n`` records from the ``rna`` table of the
 # `RNA central database <https://rnacentral.org/help/public-database>`__ . Since this database is public, we can
@@ -39,7 +37,9 @@
 #    **Never** store passwords for proprietary or sensitive databases! If you need to store and access secrets in a task,
 #    Flyte provides a convenient API. See :ref:`sphx_glr_auto_core_containerization_use_secrets.py` for more details.
 
-DATABASE_URI = "postgresql://reader:NWDMCE5xdipIjRrp@hh-pgsql-public.ebi.ac.uk:5432/pfmegrnargs"
+DATABASE_URI = (
+    "postgresql://reader:NWDMCE5xdipIjRrp@hh-pgsql-public.ebi.ac.uk:5432/pfmegrnargs"
+)
 
 # Here we define the schema of the expected output of the query, which we then re-use in the `get_mean_length` task.
 DataSchema = FlyteSchema[kwtypes(sequence_length=int)]
@@ -71,7 +71,9 @@ def get_mean_length(data: DataSchema) -> float:
 # Finally, we put everything together into a workflow:
 @workflow
 def my_wf(min_length: int, max_length: int, limit: int) -> float:
-    return get_mean_length(data=sql_task(min_length=min_length, max_length=max_length, limit=limit))
+    return get_mean_length(
+        data=sql_task(min_length=min_length, max_length=max_length, limit=limit)
+    )
 
 
 if __name__ == "__main__":
diff --git a/cookbook/integrations/kubernetes/k8s_spark/README.rst b/cookbook/integrations/kubernetes/k8s_spark/README.rst
index 0989f18a02..c183241616 100644
--- a/cookbook/integrations/kubernetes/k8s_spark/README.rst
+++ b/cookbook/integrations/kubernetes/k8s_spark/README.rst
@@ -74,7 +74,7 @@ Step 2: Environment Setup
 
 #. Build Spark image correctly as explained in :ref:`spark-docker-image`.
 
-#. Enable Spark plugin for Flyte refering to the :ref:`spark-examples` section. Additionally, Flyte uses the SparkOperator to run Spark Jobs and separate K8s Service Account/Role per namespace, which are created as part of the standard Flyte deployment.
+#. Enable Spark plugin for Flyte referring to the :ref:`spark-examples` section. Additionally, Flyte uses the SparkOperator to run Spark Jobs and separate K8s Service Account/Role per namespace, which are created as part of the standard Flyte deployment.
 
 #. Ensure you have enough resources on your K8s cluster. Based on the resources required for your Spark job (across drivers/executors), you may have to tweak resource quotas for the namespace.
 
diff --git a/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py b/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py
index ab58fe1c7c..e51a6343bd 100644
--- a/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py
+++ b/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py
@@ -113,7 +113,7 @@ def hello_spark(partitions: int) -> float:
 def f(_):
     x = random.random() * 2 - 1
     y = random.random() * 2 - 1
-    return 1 if x ** 2 + y ** 2 <= 1 else 0
+    return 1 if x**2 + y**2 <= 1 else 0
 
 
 # %%
diff --git a/cookbook/integrations/kubernetes/kfmpi/mpi_mnist.py b/cookbook/integrations/kubernetes/kfmpi/mpi_mnist.py
index 4e1caa1df7..6812022849 100644
--- a/cookbook/integrations/kubernetes/kfmpi/mpi_mnist.py
+++ b/cookbook/integrations/kubernetes/kfmpi/mpi_mnist.py
@@ -11,13 +11,14 @@
 import pathlib
 
 import flytekit
-import tensorflow as tf
 import horovod.tensorflow as hvd
-from flytekit import task, workflow, Resources
-from flytekit.types.directory import FlyteDirectory
+import tensorflow as tf
+from flytekit import Resources, task, workflow
 from flytekit.core.base_task import IgnoreOutputs
+from flytekit.types.directory import FlyteDirectory
 from flytekitplugins.kfmpi import MPIJob
 
+
 # %%
 # We define a training step that will be called from the training loop.
 # This step captures the training loss and updates the model weights through gradients.
@@ -47,6 +48,7 @@ def training_step(images, labels, first_batch, mnist_model, loss, opt):
 
     return loss_value
 
+
 # %%
 # We define an MPIJob-enabled task. The configuration given in the MPIJob constructor will be used to set up the distributed training environment.
 #
@@ -66,10 +68,12 @@ def training_step(images, labels, first_batch, mnist_model, loss, opt):
     retries=3,
     cache=True,
     cache_version="0.1",
-    requests=Resources(cpu='1', mem="600Mi"),
-    limits=Resources(cpu='2'),
+    requests=Resources(cpu="1", mem="600Mi"),
+    limits=Resources(cpu="2"),
 )
-def horovod_train_task(batch_size: int, buffer_size: int, dataset_size: int) -> FlyteDirectory:
+def horovod_train_task(
+    batch_size: int, buffer_size: int, dataset_size: int
+) -> FlyteDirectory:
     """
     :param batch_size: Represents the number of consecutive elements of this dataset to combine in a single batch.
     :param buffer_size: Defines the size of the buffer used to hold elements of the dataset used for training.
@@ -78,25 +82,30 @@ def horovod_train_task(batch_size: int, buffer_size: int, dataset_size: int) ->
     """
     hvd.init()
 
-    (mnist_images, mnist_labels), _ = \
-        tf.keras.datasets.mnist.load_data(path='mnist-%d.npz' % hvd.rank())
+    (mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data(
+        path="mnist-%d.npz" % hvd.rank()
+    )
 
     dataset = tf.data.Dataset.from_tensor_slices(
-        (tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32),
-         tf.cast(mnist_labels, tf.int64))
+        (
+            tf.cast(mnist_images[..., tf.newaxis] / 255.0, tf.float32),
+            tf.cast(mnist_labels, tf.int64),
+        )
     )
     dataset = dataset.repeat().shuffle(buffer_size).batch(batch_size)
 
-    mnist_model = tf.keras.Sequential([
-        tf.keras.layers.Conv2D(32, [3, 3], activation='relu'),
-        tf.keras.layers.Conv2D(64, [3, 3], activation='relu'),
-        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
-        tf.keras.layers.Dropout(0.25),
-        tf.keras.layers.Flatten(),
-        tf.keras.layers.Dense(128, activation='relu'),
-        tf.keras.layers.Dropout(0.5),
-        tf.keras.layers.Dense(10, activation='softmax')
-    ])
+    mnist_model = tf.keras.Sequential(
+        [
+            tf.keras.layers.Conv2D(32, [3, 3], activation="relu"),
+            tf.keras.layers.Conv2D(64, [3, 3], activation="relu"),
+            tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
+            tf.keras.layers.Dropout(0.25),
+            tf.keras.layers.Flatten(),
+            tf.keras.layers.Dense(128, activation="relu"),
+            tf.keras.layers.Dropout(0.5),
+            tf.keras.layers.Dense(10, activation="softmax"),
+        ]
+    )
     loss = tf.losses.SparseCategoricalCrossentropy()
 
     # Horovod: adjust learning rate based on number of GPUs.
@@ -112,7 +121,7 @@ def horovod_train_task(batch_size: int, buffer_size: int, dataset_size: int) ->
         loss_value = training_step(images, labels, batch == 0, mnist_model, loss, opt)
 
         if batch % 10 == 0 and hvd.local_rank() == 0:
-            print('Step #%d\tLoss: %.6f' % (batch, loss_value))
+            print("Step #%d\tLoss: %.6f" % (batch, loss_value))
 
     if hvd.rank() != 0:
         raise IgnoreOutputs("I am not rank 0")
@@ -122,22 +131,34 @@ def horovod_train_task(batch_size: int, buffer_size: int, dataset_size: int) ->
     checkpoint.save(checkpoint_prefix)
 
     tf.keras.models.save_model(
-        mnist_model, str(working_dir), overwrite=True, include_optimizer=True, save_format=None,
-        signatures=None, options=None, save_traces=True
+        mnist_model,
+        str(working_dir),
+        overwrite=True,
+        include_optimizer=True,
+        save_format=None,
+        signatures=None,
+        options=None,
+        save_traces=True,
     )
     return FlyteDirectory(path=str(working_dir))
 
+
 # %%
 # Lastly, we can call the workflow and run the example.
 @workflow
-def horovod_training_wf(batch_size: int = 128, buffer_size: int = 10000, dataset_size: int = 10000) -> FlyteDirectory:
+def horovod_training_wf(
+    batch_size: int = 128, buffer_size: int = 10000, dataset_size: int = 10000
+) -> FlyteDirectory:
     """
     :param batch_size: Represents the number of consecutive elements of this dataset to combine in a single batch.
     :param buffer_size: Defines the size of the buffer used to hold elements of the dataset used for training.
     :param dataset_size: The number of elements of this dataset that should be taken to form the new dataset when
         running batched training.
     """
-    return horovod_train_task(batch_size=batch_size, buffer_size=buffer_size, dataset_size=dataset_size)
+    return horovod_train_task(
+        batch_size=batch_size, buffer_size=buffer_size, dataset_size=dataset_size
+    )
+
 
 if __name__ == "__main__":
     model, plot, logs = horovod_training_wf()
diff --git a/cookbook/integrations/kubernetes/kfpytorch/pytorch_mnist.py b/cookbook/integrations/kubernetes/kfpytorch/pytorch_mnist.py
index 445c37ba81..403ca25c80 100644
--- a/cookbook/integrations/kubernetes/kfpytorch/pytorch_mnist.py
+++ b/cookbook/integrations/kubernetes/kfpytorch/pytorch_mnist.py
@@ -9,6 +9,7 @@
 import os
 import typing
 from dataclasses import dataclass
+from typing import Tuple
 
 import matplotlib.pyplot as plt
 import torch
@@ -22,7 +23,6 @@
 from torch import distributed as dist
 from torch import nn, optim
 from torchvision import datasets, transforms
-from typing import Tuple
 
 WORLD_SIZE = int(os.environ.get("WORLD_SIZE", 1))
 
@@ -186,7 +186,7 @@ class Hyperparameters(object):
     cache=True,
     cache_version="1.0",
     requests=Resources(cpu=cpu_request, mem=mem_request, gpu=gpu_request),
-    limits=Resources(mem=mem_limit, gpu=gpu_limit)
+    limits=Resources(mem=mem_limit, gpu=gpu_limit),
 )
 def mnist_pytorch_job(hp: Hyperparameters) -> TrainingOutputs:
     log_dir = "logs"
diff --git a/cookbook/integrations/kubernetes/kftensorflow/tf_mnist.py b/cookbook/integrations/kubernetes/kftensorflow/tf_mnist.py
index a5a9b8c613..a9b15429ce 100644
--- a/cookbook/integrations/kubernetes/kftensorflow/tf_mnist.py
+++ b/cookbook/integrations/kubernetes/kftensorflow/tf_mnist.py
@@ -14,16 +14,15 @@
 # %%
 # First, we load the libraries.
 import os
+from dataclasses import dataclass
+from typing import NamedTuple, Tuple
 
-from flytekitplugins.kftensorflow import TfJob
-from flytekit import task, workflow, Resources
-
-import tensorflow_datasets as tfds
 import tensorflow as tf
-from typing import NamedTuple, Tuple
-from flytekit.types.directory import FlyteDirectory
-from dataclasses import dataclass
+import tensorflow_datasets as tfds
 from dataclasses_json import dataclass_json
+from flytekit import Resources, task, workflow
+from flytekit.types.directory import FlyteDirectory
+from flytekitplugins.kftensorflow import TfJob
 
 # %%
 # We define ``MODEL_FILE_PATH`` indicating where to store the model file.
@@ -31,6 +30,8 @@
 
 # %%
 # We initialize a data class to store the hyperparameters.
+
+
 @dataclass_json
 @dataclass
 class Hyperparameters(object):
diff --git a/cookbook/integrations/kubernetes/pod/pod.py b/cookbook/integrations/kubernetes/pod/pod.py
index fef6786abc..e7b1a5c289 100644
--- a/cookbook/integrations/kubernetes/pod/pod.py
+++ b/cookbook/integrations/kubernetes/pod/pod.py
@@ -38,7 +38,7 @@
 import time
 from typing import List
 
-from flytekit import task, workflow, Resources
+from flytekit import Resources, TaskMetadata, dynamic, map_task, task, workflow
 from flytekitplugins.pod import Pod
 from kubernetes.client.models import (
     V1Container,
@@ -53,6 +53,8 @@
 
 # %%
 # We define a simple pod spec with two containers.
+
+
 def generate_pod_spec_for_task():
 
     # Primary containers do not require us to specify an image, the default image built for Flyte tasks will get used.
@@ -126,7 +128,6 @@ def pod_workflow() -> str:
 #
 # To use pod task as part of map task, we send pod task definition to :py:func:`~flytekit:flytekit.map_task`.
 # This will run pod task across a collection of inputs.
-from flytekit import map_task, TaskMetadata
 
 
 @task(
@@ -151,7 +152,7 @@ def pod_workflow() -> str:
                         limits={"cpu": ".5", "memory": "500Mi"},
                     ),
                 )
-            ]
+            ],
         ),
         primary_container_name="primary",
     )
@@ -180,7 +181,6 @@ def my_map_workflow(a: List[int]) -> str:
 # ====================
 #
 # To use pod task a dynamic task, simply pass the pod task config to an annotated dynamic task.
-from flytekit import dynamic
 
 
 @task
@@ -209,7 +209,7 @@ def my_dynamic_pod_task(val: int) -> str:
 
 
 @workflow
-def my_dynamic_pod_task_workflow(val: int=6) -> str:
+def my_dynamic_pod_task_workflow(val: int = 6) -> str:
     s = my_dynamic_pod_task(val=val)
     return s
 
diff --git a/cookbook/larger_apps/larger_apps_iterate.py b/cookbook/larger_apps/larger_apps_iterate.py
index f55d30fa52..a300040a9e 100644
--- a/cookbook/larger_apps/larger_apps_iterate.py
+++ b/cookbook/larger_apps/larger_apps_iterate.py
@@ -163,7 +163,7 @@ def my_wf(message: str) -> str:
 Finally, you can execute the updated workflow programmatically with ``flytectl``.
 
 To pass arguments to the workflow, update the execution spec file that we previously generated in the
-:ref:`Deploying to the Coud <larger_apps_deploy>` step.
+:ref:`Deploying to the Cloud <larger_apps_deploy>` step.
 
 Generate an execution spec file. This will prompt you to overwrite and answer 'y' on it.