diff --git a/.github/workflows/update-templates-to-examples.yml b/.github/workflows/update-templates-to-examples.yml index b3745ff1d46..4de1ec57928 100644 --- a/.github/workflows/update-templates-to-examples.yml +++ b/.github/workflows/update-templates-to-examples.yml @@ -50,7 +50,7 @@ jobs: python-version: ${{ inputs.python-version }} stack-name: local ref-zenml: ${{ github.ref }} - ref-template: 2024.01.18 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py + ref-template: 2024.01.22 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py - name: Clean-up run: | rm -rf ./local_checkout @@ -191,7 +191,7 @@ jobs: python-version: ${{ inputs.python-version }} stack-name: local ref-zenml: ${{ github.ref }} - ref-template: 2024.01.12 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py + ref-template: 2024.01.22 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py - name: Clean-up run: | rm -rf ./local_checkout diff --git a/docs/book/user-guide/starter-guide/manage-artifacts.md b/docs/book/user-guide/starter-guide/manage-artifacts.md index ad6a5d6dafd..1e579a65880 100644 --- a/docs/book/user-guide/starter-guide/manage-artifacts.md +++ b/docs/book/user-guide/starter-guide/manage-artifacts.md @@ -150,30 +150,40 @@ if __name__ == "__main__": Optionally, you can configure the `ExternalArtifact` to use a custom [materializer](../advanced-guide/data-management/handle-custom-data-types.md) for your data or disable artifact metadata and visualizations. Check out the [SDK docs](https://sdkdocs.zenml.io/latest/core_code_docs/core-artifacts/#zenml.artifacts.external_artifact.ExternalArtifact) for all available options. +{% hint style="info" %} +Using an `ExternalArtifact` for your step automatically disables caching for the step. +{% endhint %} + ### Consuming artifacts produced by other pipelines -It is also common to consume an artifact downstream after producing it in an upstream pipeline or step. As we have learned in the [previous section](fetching-pipelines.md#fetching-artifacts-directly), the `Client` can be used to fetch artifacts directly. However, in ZenML the best practice is not to use the `Client` for this use-case, but rather use the `ExternalArtifact` to pass existing artifacts from other pipeline runs into your steps. This is a more convenient interface: +It is also common to consume an artifact downstream after producing it in an upstream pipeline or step. As we have learned in the [previous section](fetching-pipelines.md#fetching-artifacts-directly), the `Client` can be used to fetch artifacts directly inside the pipeline code: ```python from uuid import UUID import pandas as pd -from zenml import step, pipeline, ExternalArtifact +from zenml import step, pipeline +from zenml.client import Client -@step +@step def trainer(dataset: pd.DataFrame): ... @pipeline def training_pipeline(): + client = Client() # Fetch by ID - dataset_artifact = ExternalArtifact(id=UUID("3a92ae32-a764-4420-98ba-07da8f742b76")) + dataset_artifact = client.get_artifact_version( + name_id_or_prefix=UUID("3a92ae32-a764-4420-98ba-07da8f742b76") + ) # Fetch by name alone - uses the latest version of this artifact - dataset_artifact = ExternalArtifact(name="iris_dataset") + dataset_artifact = client.get_artifact_version(name_id_or_prefix="iris_dataset") # Fetch by name and version - dataset_artifact = ExternalArtifact(name="iris_dataset", version="raw_2023") + dataset_artifact = client.get_artifact_version( + name_id_or_prefix="iris_dataset", version="raw_2023" + ) # Pass into any step trainer(dataset=dataset_artifact) @@ -184,7 +194,7 @@ if __name__ == "__main__": ``` {% hint style="info" %} -Using an `ExternalArtifact` with input data for your step automatically disables caching for the step. +Calls of `Client` methods like `get_artifact_version` directly inside the pipeline code makes use of ZenML's [late materialization](../advanced-guide/data-management/late-materialization.md) behind the scenes. {% endhint %} ## Managing artifacts **not** produced by ZenML pipelines @@ -327,8 +337,10 @@ import numpy as np from sklearn.base import ClassifierMixin from sklearn.datasets import load_digits from sklearn.svm import SVC -from zenml import ArtifactConfig, ExternalArtifact, pipeline, step, log_artifact_metadata +from zenml import ArtifactConfig, pipeline, step, log_artifact_metadata from zenml import save_artifact, load_artifact +from zenml.client import Client + @step def versioned_data_loader_step() -> ( @@ -349,7 +361,8 @@ def versioned_data_loader_step() -> ( def model_finetuner_step( model: ClassifierMixin, dataset: Tuple[np.ndarray, np.ndarray] ) -> Annotated[ - ClassifierMixin, ArtifactConfig(name="my_model", is_model_artifact=True, tags=["SVC", "trained"]) + ClassifierMixin, + ArtifactConfig(name="my_model", is_model_artifact=True, tags=["SVC", "trained"]), ]: """Finetunes a given model on a given dataset.""" model.fit(dataset[0], dataset[1]) @@ -363,15 +376,20 @@ def model_finetuning_pipeline( dataset_version: Optional[str] = None, model_version: Optional[str] = None, ): + client = Client() # Either load a previous version of "my_dataset" or create a new one if dataset_version: - dataset = ExternalArtifact(name="my_dataset", version=dataset_version) + dataset = client.get_artifact_version( + name_id_or_prefix="my_dataset", version=dataset_version + ) else: dataset = versioned_data_loader_step() # Load the model to finetune # If no version is specified, the latest version of "my_model" is used - model = ExternalArtifact(name="my_model", version=model_version) + model = client.get_artifact_version( + name_id_or_prefix="my_model", version=model_version + ) # Finetune the model # This automatically creates a new version of "my_model" @@ -396,6 +414,7 @@ def main(): old_dataset = load_artifact("my_dataset", version="1") latest_trained_model.predict(old_dataset[0]) + if __name__ == "__main__": main() ``` diff --git a/examples/e2e/.copier-answers.yml b/examples/e2e/.copier-answers.yml index e264e69ae54..71f41d1cd31 100644 --- a/examples/e2e/.copier-answers.yml +++ b/examples/e2e/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.01.18 +_commit: 2024.01.22 _src_path: gh:zenml-io/template-e2e-batch data_quality_checks: true email: '' diff --git a/examples/e2e/pipelines/batch_inference.py b/examples/e2e/pipelines/batch_inference.py index 2d5b512da38..fbc772273c5 100644 --- a/examples/e2e/pipelines/batch_inference.py +++ b/examples/e2e/pipelines/batch_inference.py @@ -24,7 +24,7 @@ notify_on_success, ) -from zenml import ExternalArtifact, pipeline +from zenml import get_pipeline_context, pipeline from zenml.integrations.evidently.metrics import EvidentlyMetricConfig from zenml.integrations.evidently.steps import evidently_report_step from zenml.logger import get_logger @@ -43,18 +43,19 @@ def e2e_use_case_batch_inference(): ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### # Link all the steps together by calling them and passing the output # of one step as the input of the next step. + model = get_pipeline_context().model ########## ETL stage ########## df_inference, target, _ = data_loader( - random_state=ExternalArtifact(name="random_state"), is_inference=True + random_state=model.get_artifact("random_state"), is_inference=True ) df_inference = inference_data_preprocessor( dataset_inf=df_inference, - preprocess_pipeline=ExternalArtifact(name="preprocess_pipeline"), + preprocess_pipeline=model.get_artifact("preprocess_pipeline"), target=target, ) ########## DataQuality stage ########## report, _ = evidently_report_step( - reference_dataset=ExternalArtifact(name="dataset_trn"), + reference_dataset=model.get_artifact("dataset_trn"), comparison_dataset=df_inference, ignored_cols=["target"], metrics=[ diff --git a/examples/quickstart/.copier-answers.yml b/examples/quickstart/.copier-answers.yml index fc682b16a21..4c6fe61341c 100644 --- a/examples/quickstart/.copier-answers.yml +++ b/examples/quickstart/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.01.12 +_commit: 2024.01.22 _src_path: gh:zenml-io/template-starter email: '' full_name: ZenML GmbH diff --git a/examples/quickstart/pipelines/training.py b/examples/quickstart/pipelines/training.py index 5d201180119..1e8410c608b 100644 --- a/examples/quickstart/pipelines/training.py +++ b/examples/quickstart/pipelines/training.py @@ -23,7 +23,8 @@ from pipelines import ( feature_engineering, ) -from zenml import ExternalArtifact, pipeline +from zenml import pipeline +from zenml.client import Client from zenml.logger import get_logger logger = get_logger(__name__) @@ -58,8 +59,13 @@ def training( if train_dataset_id is None or test_dataset_id is None: dataset_trn, dataset_tst = feature_engineering() else: - dataset_trn = ExternalArtifact(id=train_dataset_id) - dataset_tst = ExternalArtifact(id=test_dataset_id) + client = Client() + dataset_trn = client.get_artifact_version( + name_id_or_prefix=train_dataset_id + ) + dataset_tst = client.get_artifact_version( + name_id_or_prefix=test_dataset_id + ) model = model_trainer( dataset_trn=dataset_trn, target=target, model_type=model_type diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 263675812c8..03ef6b283f2 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -142,7 +142,7 @@ "\n", "import random\n", "import pandas as pd\n", - "from zenml import step, ExternalArtifact, pipeline, Model, get_step_context\n", + "from zenml import step, pipeline, Model, get_step_context\n", "from zenml.client import Client\n", "from zenml.logger import get_logger\n", "from uuid import UUID\n", @@ -602,7 +602,7 @@ "metadata": {}, "source": [ "ZenML allows you to load any version of any dataset that is tracked by the framework\n", - "directly into a pipeline using the `ExternalArtifact` interface. This is very convenient\n", + "directly into a pipeline using the `Client().get_artifact_version` interface. This is very convenient\n", "in this case, as we'd like to send our preprocessed dataset from the older pipeline directly\n", "into the training pipeline." ] @@ -628,8 +628,8 @@ " dataset_trn, dataset_tst = feature_engineering()\n", " else:\n", " # Load the datasets from an older pipeline\n", - " dataset_trn = ExternalArtifact(id=train_dataset_id)\n", - " dataset_tst = ExternalArtifact(id=test_dataset_id) \n", + " dataset_trn = client.get_artifact_version(id=train_dataset_id)\n", + " dataset_tst = client.get_artifact_version(id=test_dataset_id) \n", "\n", " trained_model = model_trainer(\n", " dataset_trn=dataset_trn,\n", @@ -981,7 +981,7 @@ " df_inference = inference_preprocessor(\n", " dataset_inf=df_inference,\n", " # We use the preprocess pipeline from the feature engineering pipeline\n", - " preprocess_pipeline=ExternalArtifact(id=preprocess_pipeline_id),\n", + " preprocess_pipeline=client.get_artifact_version(id=preprocess_pipeline_id),\n", " target=target,\n", " )\n", " inference_predict(\n", diff --git a/src/zenml/artifacts/external_artifact.py b/src/zenml/artifacts/external_artifact.py index 62a80ba82e7..6aecb94cd23 100644 --- a/src/zenml/artifacts/external_artifact.py +++ b/src/zenml/artifacts/external_artifact.py @@ -52,13 +52,6 @@ class ExternalArtifact(ExternalArtifactConfiguration): value: The artifact value. id: The ID of an artifact that should be referenced by this external artifact. - name: Name of an artifact to search. If none of - `version`, `pipeline_run_name`, or `pipeline_name` are set, the - latest version of the artifact will be used. - version: Version of the artifact to search. Only used when `name` is - provided. Cannot be used together with `model`. - model: The model to search in. Only used when `name` - is provided. Cannot be used together with `version`. materializer: The materializer to use for saving the artifact value to the artifact store. Only used when `value` is provided. store_artifact_metadata: Whether metadata for the artifact should @@ -91,6 +84,32 @@ def my_pipeline(): @root_validator def _validate_all(cls, values: Dict[str, Any]) -> Dict[str, Any]: + deprecation_msg = ( + "Parameter `{param}` or `ExternalArtifact` will be deprecated " + "in upcoming releases. Please use `{substitute}` instead." + ) + for param, substitute in [ + ["id", "Client().get_artifact_version(name_id_or_prefix=)"], + [ + "name", + "Client().get_artifact_version(name_id_or_prefix=)", + ], + [ + "version", + "Client().get_artifact_version(name_id_or_prefix=,version=)", + ], + [ + "model", + "Client().get_model_version(,).get_artifact(name)", + ], + ]: + if _ := values.get(param, None): + logger.warning( + deprecation_msg.format( + param=param, + substitute=substitute, + ) + ) options = [ values.get(field, None) is not None for field in ["value", "id", "name"] diff --git a/src/zenml/cli/base.py b/src/zenml/cli/base.py index e03e89169d9..9ad31245e1e 100644 --- a/src/zenml/cli/base.py +++ b/src/zenml/cli/base.py @@ -73,11 +73,11 @@ def copier_github_url(self) -> str: ZENML_PROJECT_TEMPLATES = dict( e2e_batch=ZenMLProjectTemplateLocation( github_url="zenml-io/template-e2e-batch", - github_tag="2024.01.18", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml + github_tag="2024.01.22", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml ), starter=ZenMLProjectTemplateLocation( github_url="zenml-io/template-starter", - github_tag="2024.01.12", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml + github_tag="2024.01.22", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml ), nlp=ZenMLProjectTemplateLocation( github_url="zenml-io/template-nlp",