zenml-io · avishniakov · Feb 1, 2024 · Jan 19, 2024 · Jan 19, 2024 · Jan 19, 2024
diff --git a/.github/workflows/update-templates-to-examples.yml b/.github/workflows/update-templates-to-examples.yml
@@ -50,7 +50,7 @@ jobs:
           python-version: ${{ inputs.python-version }}
           stack-name: local
           ref-zenml: ${{ github.ref }}
-          ref-template: 2024.01.18  # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
+          ref-template: 2024.01.22  # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
       - name: Clean-up
         run: |
           rm -rf ./local_checkout
@@ -191,7 +191,7 @@ jobs:
           python-version: ${{ inputs.python-version }}
           stack-name: local
           ref-zenml: ${{ github.ref }}
-          ref-template: 2024.01.12  # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
+          ref-template: 2024.01.22  # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
       - name: Clean-up
         run: |
           rm -rf ./local_checkout

diff --git a/docs/book/user-guide/starter-guide/manage-artifacts.md b/docs/book/user-guide/starter-guide/manage-artifacts.md
@@ -150,30 +150,40 @@ if __name__ == "__main__":
 
 Optionally, you can configure the `ExternalArtifact` to use a custom [materializer](../advanced-guide/data-management/handle-custom-data-types.md) for your data or disable artifact metadata and visualizations. Check out the [SDK docs](https://sdkdocs.zenml.io/latest/core_code_docs/core-artifacts/#zenml.artifacts.external_artifact.ExternalArtifact) for all available options.
 
+{% hint style="info" %}
+Using an `ExternalArtifact` for your step automatically disables caching for the step.
+{% endhint %}
+
 ### Consuming artifacts produced by other pipelines
 
-It is also common to consume an artifact downstream after producing it in an upstream pipeline or step.  As we have learned in the [previous section](fetching-pipelines.md#fetching-artifacts-directly), the `Client` can be used to fetch artifacts directly. However, in ZenML the best practice is not to use the `Client` for this use-case, but rather use the `ExternalArtifact` to pass existing artifacts from other pipeline runs into your steps. This is a more convenient interface:
+It is also common to consume an artifact downstream after producing it in an upstream pipeline or step. As we have learned in the [previous section](fetching-pipelines.md#fetching-artifacts-directly), the `Client` can be used to fetch artifacts directly inside the pipeline code:
 
 ```python
 from uuid import UUID
 import pandas as pd
-from zenml import step, pipeline, ExternalArtifact
+from zenml import step, pipeline
+from zenml.client import Client
 
 
-@step 
+@step
 def trainer(dataset: pd.DataFrame):
     ...
 
 @pipeline
 def training_pipeline():
+    client = Client()
     # Fetch by ID
-    dataset_artifact = ExternalArtifact(id=UUID("3a92ae32-a764-4420-98ba-07da8f742b76"))
+    dataset_artifact = client.get_artifact_version(
+        name_id_or_prefix=UUID("3a92ae32-a764-4420-98ba-07da8f742b76")
+    )
 
     # Fetch by name alone - uses the latest version of this artifact
-    dataset_artifact = ExternalArtifact(name="iris_dataset")
+    dataset_artifact = client.get_artifact_version(name_id_or_prefix="iris_dataset")
 
     # Fetch by name and version
-    dataset_artifact = ExternalArtifact(name="iris_dataset", version="raw_2023")
+    dataset_artifact = client.get_artifact_version(
+        name_id_or_prefix="iris_dataset", version="raw_2023"
+    )
 
     # Pass into any step
     trainer(dataset=dataset_artifact)
@@ -184,7 +194,7 @@ if __name__ == "__main__":
 ```
 
 {% hint style="info" %}
-Using an `ExternalArtifact` with input data for your step automatically disables caching for the step.
+Calls of `Client` methods like `get_artifact_version` directly inside the pipeline code makes use of ZenML's [late materialization](../advanced-guide/data-management/late-materialization.md) behind the scenes.
 {% endhint %}
 
 ## Managing artifacts **not** produced by ZenML pipelines
@@ -327,8 +337,10 @@ import numpy as np
 from sklearn.base import ClassifierMixin
 from sklearn.datasets import load_digits
 from sklearn.svm import SVC
-from zenml import ArtifactConfig, ExternalArtifact, pipeline, step, log_artifact_metadata
+from zenml import ArtifactConfig, pipeline, step, log_artifact_metadata
 from zenml import save_artifact, load_artifact
+from zenml.client import Client
+
 
 @step
 def versioned_data_loader_step() -> (
@@ -349,7 +361,8 @@ def versioned_data_loader_step() -> (
 def model_finetuner_step(
     model: ClassifierMixin, dataset: Tuple[np.ndarray, np.ndarray]
 ) -> Annotated[
-    ClassifierMixin, ArtifactConfig(name="my_model", is_model_artifact=True, tags=["SVC", "trained"])
+    ClassifierMixin,
+    ArtifactConfig(name="my_model", is_model_artifact=True, tags=["SVC", "trained"]),
 ]:
     """Finetunes a given model on a given dataset."""
     model.fit(dataset[0], dataset[1])
@@ -363,15 +376,20 @@ def model_finetuning_pipeline(
     dataset_version: Optional[str] = None,
     model_version: Optional[str] = None,
 ):
+    client = Client()
     # Either load a previous version of "my_dataset" or create a new one
     if dataset_version:
-        dataset = ExternalArtifact(name="my_dataset", version=dataset_version)
+        dataset = client.get_artifact_version(
+            name_id_or_prefix="my_dataset", version=dataset_version
+        )
     else:
         dataset = versioned_data_loader_step()
 
     # Load the model to finetune
     # If no version is specified, the latest version of "my_model" is used
-    model = ExternalArtifact(name="my_model", version=model_version)
+    model = client.get_artifact_version(
+        name_id_or_prefix="my_model", version=model_version
+    )
 
     # Finetune the model
     # This automatically creates a new version of "my_model"
@@ -396,6 +414,7 @@ def main():
     old_dataset = load_artifact("my_dataset", version="1")
     latest_trained_model.predict(old_dataset[0])
 
+
 if __name__ == "__main__":
     main()
 ```

diff --git a/examples/e2e/.copier-answers.yml b/examples/e2e/.copier-answers.yml
@@ -1,5 +1,5 @@
 # Changes here will be overwritten by Copier
-_commit: 2024.01.18
+_commit: 2024.01.22
 _src_path: gh:zenml-io/template-e2e-batch
 data_quality_checks: true
 email: ''

diff --git a/examples/e2e/pipelines/batch_inference.py b/examples/e2e/pipelines/batch_inference.py
@@ -24,7 +24,7 @@
     notify_on_success,
 )
 
-from zenml import ExternalArtifact, pipeline
+from zenml import get_pipeline_context, pipeline
 from zenml.integrations.evidently.metrics import EvidentlyMetricConfig
 from zenml.integrations.evidently.steps import evidently_report_step
 from zenml.logger import get_logger
@@ -43,18 +43,19 @@ def e2e_use_case_batch_inference():
     ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ###
     # Link all the steps together by calling them and passing the output
     # of one step as the input of the next step.
+    model = get_pipeline_context().model
     ########## ETL stage  ##########
     df_inference, target, _ = data_loader(
-        random_state=ExternalArtifact(name="random_state"), is_inference=True
+        random_state=model.get_artifact("random_state"), is_inference=True
     )
     df_inference = inference_data_preprocessor(
         dataset_inf=df_inference,
-        preprocess_pipeline=ExternalArtifact(name="preprocess_pipeline"),
+        preprocess_pipeline=model.get_artifact("preprocess_pipeline"),
         target=target,
     )
     ########## DataQuality stage  ##########
     report, _ = evidently_report_step(
-        reference_dataset=ExternalArtifact(name="dataset_trn"),
+        reference_dataset=model.get_artifact("dataset_trn"),
         comparison_dataset=df_inference,
         ignored_cols=["target"],
         metrics=[

diff --git a/examples/quickstart/.copier-answers.yml b/examples/quickstart/.copier-answers.yml
@@ -1,5 +1,5 @@
 # Changes here will be overwritten by Copier
-_commit: 2024.01.12
+_commit: 2024.01.22
 _src_path: gh:zenml-io/template-starter
 email: ''
 full_name: ZenML GmbH

diff --git a/examples/quickstart/pipelines/training.py b/examples/quickstart/pipelines/training.py
@@ -23,7 +23,8 @@
 from pipelines import (
     feature_engineering,
 )
-from zenml import ExternalArtifact, pipeline
+from zenml import pipeline
+from zenml.client import Client
 from zenml.logger import get_logger
 
 logger = get_logger(__name__)
@@ -58,8 +59,13 @@ def training(
     if train_dataset_id is None or test_dataset_id is None:
         dataset_trn, dataset_tst = feature_engineering()
     else:
-        dataset_trn = ExternalArtifact(id=train_dataset_id)
-        dataset_tst = ExternalArtifact(id=test_dataset_id)
+        client = Client()
+        dataset_trn = client.get_artifact_version(
+            name_id_or_prefix=train_dataset_id
+        )
+        dataset_tst = client.get_artifact_version(
+            name_id_or_prefix=test_dataset_id
+        )
 
     model = model_trainer(
         dataset_trn=dataset_trn, target=target, model_type=model_type

diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb
@@ -142,7 +142,7 @@
     "\n",
     "import random\n",
     "import pandas as pd\n",
-    "from zenml import step, ExternalArtifact, pipeline, Model, get_step_context\n",
+    "from zenml import step, pipeline, Model, get_step_context\n",
     "from zenml.client import Client\n",
     "from zenml.logger import get_logger\n",
     "from uuid import UUID\n",
@@ -602,7 +602,7 @@
    "metadata": {},
    "source": [
     "ZenML allows you to load any version of any dataset that is tracked by the framework\n",
-    "directly into a pipeline using the `ExternalArtifact` interface. This is very convenient\n",
+    "directly into a pipeline using the `Client().get_artifact_version` interface. This is very convenient\n",
     "in this case, as we'd like to send our preprocessed dataset from the older pipeline directly\n",
     "into the training pipeline."
    ]
@@ -628,8 +628,8 @@
     "        dataset_trn, dataset_tst = feature_engineering()\n",
     "    else:\n",
     "        # Load the datasets from an older pipeline\n",
-    "        dataset_trn = ExternalArtifact(id=train_dataset_id)\n",
-    "        dataset_tst = ExternalArtifact(id=test_dataset_id) \n",
+    "        dataset_trn = client.get_artifact_version(id=train_dataset_id)\n",
+    "        dataset_tst = client.get_artifact_version(id=test_dataset_id) \n",
     "\n",
     "    trained_model = model_trainer(\n",
     "        dataset_trn=dataset_trn,\n",
@@ -981,7 +981,7 @@
     "    df_inference = inference_preprocessor(\n",
     "        dataset_inf=df_inference,\n",
     "        # We use the preprocess pipeline from the feature engineering pipeline\n",
-    "        preprocess_pipeline=ExternalArtifact(id=preprocess_pipeline_id),\n",
+    "        preprocess_pipeline=client.get_artifact_version(id=preprocess_pipeline_id),\n",
     "        target=target,\n",
     "    )\n",
     "    inference_predict(\n",

diff --git a/src/zenml/artifacts/external_artifact.py b/src/zenml/artifacts/external_artifact.py
@@ -52,13 +52,6 @@ class ExternalArtifact(ExternalArtifactConfiguration):
         value: The artifact value.
         id: The ID of an artifact that should be referenced by this external
             artifact.
-        name: Name of an artifact to search. If none of
-            `version`, `pipeline_run_name`, or `pipeline_name` are set, the
-            latest version of the artifact will be used.
-        version: Version of the artifact to search. Only used when `name` is
-            provided. Cannot be used together with `model`.
-        model: The model to search in. Only used when `name`
-            is provided. Cannot be used together with `version`.
         materializer: The materializer to use for saving the artifact value
             to the artifact store. Only used when `value` is provided.
         store_artifact_metadata: Whether metadata for the artifact should
@@ -91,6 +84,32 @@ def my_pipeline():
 
     @root_validator
     def _validate_all(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        deprecation_msg = (
+            "Parameter `{param}` or `ExternalArtifact` will be deprecated "
+            "in upcoming releases. Please use `{substitute}` instead."
+        )
+        for param, substitute in [
+            ["id", "Client().get_artifact_version(name_id_or_prefix=<id>)"],
+            [
+                "name",
+                "Client().get_artifact_version(name_id_or_prefix=<name>)",
+            ],
+            [
+                "version",
+                "Client().get_artifact_version(name_id_or_prefix=<name>,version=<version>)",
+            ],
+            [
+                "model",
+                "Client().get_model_version(<model_name>,<model_version>).get_artifact(name)",
+            ],
+        ]:
+            if _ := values.get(param, None):
+                logger.warning(
+                    deprecation_msg.format(
+                        param=param,
+                        substitute=substitute,
+                    )
+                )
         options = [
             values.get(field, None) is not None
             for field in ["value", "id", "name"]

diff --git a/src/zenml/cli/base.py b/src/zenml/cli/base.py
@@ -73,11 +73,11 @@ def copier_github_url(self) -> str:
 ZENML_PROJECT_TEMPLATES = dict(
     e2e_batch=ZenMLProjectTemplateLocation(
         github_url="zenml-io/template-e2e-batch",
-        github_tag="2024.01.18",  # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
+        github_tag="2024.01.22",  # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
     ),
     starter=ZenMLProjectTemplateLocation(
         github_url="zenml-io/template-starter",
-        github_tag="2024.01.12",  # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
+        github_tag="2024.01.22",  # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
     ),
     nlp=ZenMLProjectTemplateLocation(
         github_url="zenml-io/template-nlp",