From b4fe222601fa692a1c5e3779ef9702f4eb1f2728 Mon Sep 17 00:00:00 2001
From: Matthias Richter <matthias.r1092@gmail.com>
Date: Thu, 23 Nov 2023 10:37:32 +0100
Subject: [PATCH 1/4] Update core package (#653)

First PR related to the data structure redesign.

Implements the following:
- New manifest structure (including validation, and evolution)
- New ComponentSpec structure (including validation)
- Removes `Subsets` and `Index`

Not all tests are running successfully. But this are already quite a few
changes. Therefore, I've created PR on feature branch
`feature/redesign-dataset-format-and-interface`, to have quicker
feedback loops.

---------

Co-authored-by: Robbe Sneyders <robbe.sneyders@gmail.com>
Co-authored-by: Philippe Moussalli <philippe.moussalli95@gmail.com>
---
 src/fondant/core/component_spec.py            |  58 +----
 src/fondant/core/manifest.py                  | 239 ++++++++---------
 src/fondant/core/schema.py                    |  31 ++-
 src/fondant/core/schemas/component_spec.json  |  32 +--
 src/fondant/core/schemas/manifest.json        |  17 +-
 .../component_specs/invalid_component.yaml}   |  10 +-
 .../component_specs/kubeflow_component.yaml   |   0
 .../component_specs/valid_component.yaml}     |  21 +-
 .../valid_component_no_args.yaml              |  13 +-
 .../evolution_examples/1/component.yaml}      |  14 +-
 .../evolution_examples/1/output_manifest.json |  36 +++
 .../evolution_examples/2}/component.yaml      |  10 +-
 .../evolution_examples/2/output_manifest.json |  33 +++
 .../evolution_examples/3/component.yaml       |  16 ++
 .../evolution_examples/3/output_manifest.json |  29 +++
 .../evolution_examples/4/component.yaml       |  12 +
 .../evolution_examples/4/output_manifest.json |  29 +++
 .../evolution_examples/input_manifest.json    |  29 +++
 .../examples/manifests/invalid_manifest.json  |  14 +
 .../examples/manifests/valid_manifest.json    |  29 +++
 tests/{ => core}/test_component_specs.py      |  28 +-
 tests/core/test_manifest.py                   | 246 ++++++++++++++++++
 tests/{ => core}/test_manifest_evolution.py   |   9 +-
 tests/{ => core}/test_schema.py               |   0
 .../component_specs/valid_component.yaml      |  29 ---
 .../components/input_manifest.json            |  22 --
 .../evolution_examples/1/output_manifest.json |  46 ----
 .../evolution_examples/2/component.yaml       |  23 --
 .../evolution_examples/2/output_manifest.json |  38 ---
 .../evolution_examples/3/component.yaml       |  24 --
 .../evolution_examples/3/output_manifest.json |  32 ---
 .../evolution_examples/4/output_manifest.json |  38 ---
 .../evolution_examples/5/component.yaml       |  21 --
 .../evolution_examples/5/output_manifest.json |  29 ---
 .../evolution_examples/6/component.yaml       |  22 --
 .../evolution_examples/6/output_manifest.json |  21 --
 .../evolution_examples/7/component.yaml       |  22 --
 .../evolution_examples/7/output_manifest.json |  21 --
 .../evolution_examples/8/output_manifest.json |  35 ---
 .../evolution_examples/input_manifest.json    |  35 ---
 .../manifests/invalid_manifest.json           |  14 -
 .../manifests/valid_manifest.json             |  35 ---
 .../component_1/manifest.json                 |  36 ---
 .../example_component/Dockerfile              |   0
 .../example_component/fondant_component.yaml  |   0
 .../example_data/components/1.yaml            |   0
 .../{ => examples}/example_data/manifest.json |   0
 .../{ => examples}/example_data/raw/split.py  |   0
 .../example_data/raw/testset.parquet          | Bin
 .../subsets_input/index/part.0.parquet        | Bin
 .../subsets_input/index/part.1.parquet        | Bin
 .../subsets_input/index/part.2.parquet        | Bin
 .../subsets_input/properties/part.0.parquet   | Bin
 .../subsets_input/properties/part.1.parquet   | Bin
 .../subsets_input/properties/part.2.parquet   | Bin
 .../subsets_input/types/part.0.parquet        | Bin
 .../subsets_input/types/part.1.parquet        | Bin
 .../subsets_input/types/part.2.parquet        | Bin
 .../example_modules/component.py              |   0
 .../example_modules/invalid_component.py      |   0
 .../invalid_double_components.py              |   0
 .../invalid_double_pipeline.py                |   0
 .../example_modules/pipeline.py               |   0
 .../compiled_pipeline/kubeflow_pipeline.yml   |   0
 .../first_component/fondant_component.yaml    |   0
 .../second_component/fondant_component.yaml   |   0
 .../first_component/fondant_component.yaml    |   0
 .../second_component/fondant_component.yaml   |   0
 .../first_component/fondant_component.yaml    |   0
 .../second_component/fondant_component.yaml   |   0
 .../example_1/first_component/Dockerfile      |   0
 .../first_component/fondant_component.yaml    |   0
 .../example_1/fourth_component/Dockerfile     |   0
 .../fourth_component/fondant_component.yaml   |   0
 .../example_1/second_component/Dockerfile     |   0
 .../second_component/fondant_component.yaml   |   0
 .../example_1/third_component/Dockerfile      |   0
 .../third_component/fondant_component.yaml    |   0
 .../components/arguments/component.yaml       |   0
 .../arguments/component_default_args.yaml     |   0
 .../components/arguments/input_manifest.json  |  14 +-
 .../example_specs/components/component.yaml   |  18 +-
 .../components/input_manifest.json            |  17 ++
 .../example_pipeline/cache/42.txt             |   0
 .../component_1/manifest.json                 |  31 +++
 .../component_2/manifest.json                 |   0
 .../component_1/manifest.json                 |   0
 .../component_2/manifest.json                 |   0
 tests/test_component.py                       |  44 +---
 tests/test_manifest.py                        | 239 -----------------
 90 files changed, 745 insertions(+), 1116 deletions(-)
 rename tests/{example_specs/evolution_examples/4/component.yaml => core/examples/component_specs/invalid_component.yaml} (84%)
 rename tests/{example_specs => core/examples}/component_specs/kubeflow_component.yaml (100%)
 rename tests/{example_specs/evolution_examples/1/component.yaml => core/examples/component_specs/valid_component.yaml} (62%)
 rename tests/{example_specs => core/examples}/component_specs/valid_component_no_args.yaml (59%)
 rename tests/{example_specs/component_specs/invalid_component.yaml => core/examples/evolution_examples/1/component.yaml} (59%)
 create mode 100644 tests/core/examples/evolution_examples/1/output_manifest.json
 rename tests/{example_specs/evolution_examples/8 => core/examples/evolution_examples/2}/component.yaml (69%)
 create mode 100644 tests/core/examples/evolution_examples/2/output_manifest.json
 create mode 100644 tests/core/examples/evolution_examples/3/component.yaml
 create mode 100644 tests/core/examples/evolution_examples/3/output_manifest.json
 create mode 100644 tests/core/examples/evolution_examples/4/component.yaml
 create mode 100644 tests/core/examples/evolution_examples/4/output_manifest.json
 create mode 100644 tests/core/examples/evolution_examples/input_manifest.json
 create mode 100644 tests/core/examples/manifests/invalid_manifest.json
 create mode 100644 tests/core/examples/manifests/valid_manifest.json
 rename tests/{ => core}/test_component_specs.py (85%)
 create mode 100644 tests/core/test_manifest.py
 rename tests/{ => core}/test_manifest_evolution.py (83%)
 rename tests/{ => core}/test_schema.py (100%)
 delete mode 100644 tests/example_specs/component_specs/valid_component.yaml
 delete mode 100644 tests/example_specs/components/input_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/1/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/2/component.yaml
 delete mode 100644 tests/example_specs/evolution_examples/2/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/3/component.yaml
 delete mode 100644 tests/example_specs/evolution_examples/3/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/4/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/5/component.yaml
 delete mode 100644 tests/example_specs/evolution_examples/5/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/6/component.yaml
 delete mode 100644 tests/example_specs/evolution_examples/6/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/7/component.yaml
 delete mode 100644 tests/example_specs/evolution_examples/7/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/8/output_manifest.json
 delete mode 100644 tests/example_specs/evolution_examples/input_manifest.json
 delete mode 100644 tests/example_specs/manifests/invalid_manifest.json
 delete mode 100644 tests/example_specs/manifests/valid_manifest.json
 delete mode 100644 tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
 rename tests/{ => examples}/example_component/Dockerfile (100%)
 rename tests/{ => examples}/example_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_data/components/1.yaml (100%)
 rename tests/{ => examples}/example_data/manifest.json (100%)
 rename tests/{ => examples}/example_data/raw/split.py (100%)
 rename tests/{ => examples}/example_data/raw/testset.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/index/part.0.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/index/part.1.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/index/part.2.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/properties/part.0.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/properties/part.1.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/properties/part.2.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/types/part.0.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/types/part.1.parquet (100%)
 rename tests/{ => examples}/example_data/subsets_input/types/part.2.parquet (100%)
 rename tests/{ => examples}/example_modules/component.py (100%)
 rename tests/{ => examples}/example_modules/invalid_component.py (100%)
 rename tests/{ => examples}/example_modules/invalid_double_components.py (100%)
 rename tests/{ => examples}/example_modules/invalid_double_pipeline.py (100%)
 rename tests/{ => examples}/example_modules/pipeline.py (100%)
 rename tests/{ => examples}/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml (100%)
 rename tests/{ => examples}/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile (100%)
 rename tests/{ => examples}/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml (100%)
 rename tests/{ => examples}/example_specs/components/arguments/component.yaml (100%)
 rename tests/{ => examples}/example_specs/components/arguments/component_default_args.yaml (100%)
 rename tests/{ => examples}/example_specs/components/arguments/input_manifest.json (60%)
 rename tests/{ => examples}/example_specs/components/component.yaml (56%)
 create mode 100644 tests/examples/example_specs/components/input_manifest.json
 rename tests/{ => examples}/example_specs/mock_base_path/example_pipeline/cache/42.txt (100%)
 create mode 100644 tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
 rename tests/{ => examples}/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json (100%)
 rename tests/{ => examples}/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json (100%)
 rename tests/{ => examples}/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json (100%)
 delete mode 100644 tests/test_manifest.py

diff --git a/src/fondant/core/component_spec.py b/src/fondant/core/component_spec.py
index cf177e07c..4dd945568 100644
--- a/src/fondant/core/component_spec.py
+++ b/src/fondant/core/component_spec.py
@@ -66,34 +66,6 @@ def kubeflow_type(self) -> str:
         return lookup[self.type]
 
 
-class ComponentSubset:
-    """
-    Class representing a Fondant Component subset.
-
-    Args:
-        specification: the part of the component json representing the subset
-    """
-
-    def __init__(self, specification: t.Dict[str, t.Any]) -> None:
-        self._specification = specification
-
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({self._specification!r})"
-
-    @property
-    def fields(self) -> t.Mapping[str, Field]:
-        return types.MappingProxyType(
-            {
-                name: Field(name=name, type=Type.from_json(field))
-                for name, field in self._specification["fields"].items()
-            },
-        )
-
-    @property
-    def additional_fields(self) -> bool:
-        return self._specification.get("additionalFields", True)
-
-
 class ComponentSpec:
     """
     Class representing a Fondant component specification.
@@ -190,39 +162,25 @@ def tags(self) -> t.List[str]:
         return self._specification.get("tags", None)
 
     @property
-    def index(self):
-        return ComponentSubset({"fields": {}})
-
-    @property
-    def consumes(self) -> t.Mapping[str, ComponentSubset]:
-        """The subsets consumed by the component as an immutable mapping."""
+    def consumes(self) -> t.Mapping[str, Field]:
+        """The fields consumed by the component as an immutable mapping."""
         return types.MappingProxyType(
             {
-                name: ComponentSubset(subset)
-                for name, subset in self._specification.get("consumes", {}).items()
-                if name != "additionalSubsets"
+                name: Field(name=name, type=Type.from_json(field))
+                for name, field in self._specification.get("consumes", {}).items()
             },
         )
 
     @property
-    def produces(self) -> t.Mapping[str, ComponentSubset]:
-        """The subsets produced by the component as an immutable mapping."""
+    def produces(self) -> t.Mapping[str, Field]:
+        """The fields produced by the component as an immutable mapping."""
         return types.MappingProxyType(
             {
-                name: ComponentSubset(subset)
-                for name, subset in self._specification.get("produces", {}).items()
-                if name != "additionalSubsets"
+                name: Field(name=name, type=Type.from_json(field))
+                for name, field in self._specification.get("produces", {}).items()
             },
         )
 
-    @property
-    def accepts_additional_subsets(self) -> bool:
-        return self._specification.get("consumes", {}).get("additionalSubsets", True)
-
-    @property
-    def outputs_additional_subsets(self) -> bool:
-        return self._specification.get("produces", {}).get("additionalSubsets", True)
-
     @property
     def args(self) -> t.Mapping[str, Argument]:
         args = self.default_arguments
diff --git a/src/fondant/core/manifest.py b/src/fondant/core/manifest.py
index 692c4e7cd..fc750620d 100644
--- a/src/fondant/core/manifest.py
+++ b/src/fondant/core/manifest.py
@@ -4,6 +4,7 @@
 import pkgutil
 import types
 import typing as t
+from collections import OrderedDict
 from dataclasses import asdict, dataclass
 from pathlib import Path
 
@@ -18,59 +19,6 @@
 from fondant.core.schema import Field, Type
 
 
-class Subset:
-    """
-    Class representing a Fondant subset.
-
-    Args:
-        specification: The part of the manifest json representing the subset
-        base_path: The base path which the subset location is defined relative to
-    """
-
-    def __init__(self, specification: dict, *, base_path: str) -> None:
-        self._specification = specification
-        self._base_path = base_path
-
-    @property
-    def location(self) -> str:
-        """The absolute location of the subset."""
-        return self._base_path + self._specification["location"]
-
-    @property
-    def fields(self) -> t.Mapping[str, Field]:
-        """The fields of the subset returned as an immutable mapping."""
-        return types.MappingProxyType(
-            {
-                name: Field(name=name, type=Type.from_json(field))
-                for name, field in self._specification["fields"].items()
-            },
-        )
-
-    def add_field(self, name: str, type_: Type, *, overwrite: bool = False) -> None:
-        if not overwrite and name in self._specification["fields"]:
-            msg = f"A field with name {name} already exists"
-            raise ValueError(msg)
-
-        self._specification["fields"][name] = type_.to_json()
-
-    def remove_field(self, name: str) -> None:
-        del self._specification["fields"][name]
-
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({self._specification!r})"
-
-
-class Index(Subset):
-    """Special case of a subset for the index, which has fixed fields."""
-
-    @property
-    def fields(self) -> t.Dict[str, Field]:
-        return {
-            "id": Field(name="id", type=Type("string")),
-            "source": Field(name="source", type=Type("string")),
-        }
-
-
 @dataclass
 class Metadata:
     """
@@ -171,8 +119,8 @@ def create(
 
         specification = {
             "metadata": metadata.to_dict(),
-            "index": {"location": f"/{pipeline_name}/{run_id}/{component_id}/index"},
-            "subsets": {},
+            "index": {"location": f"/{component_id}"},
+            "fields": {},
         }
         return cls(specification)
 
@@ -196,6 +144,10 @@ def copy(self) -> "Manifest":
     def metadata(self) -> t.Dict[str, t.Any]:
         return self._specification["metadata"]
 
+    @property
+    def index(self) -> Field:
+        return Field(name="Index", location=self._specification["index"]["location"])
+
     def update_metadata(self, key: str, value: t.Any) -> None:
         self.metadata[key] = value
 
@@ -203,6 +155,44 @@ def update_metadata(self, key: str, value: t.Any) -> None:
     def base_path(self) -> str:
         return self.metadata["base_path"]
 
+    @property
+    def field_mapping(self) -> t.Mapping[str, t.List[str]]:
+        """
+        Retrieve a mapping of field locations to corresponding field names.
+        A dictionary where keys are field locations and values are lists
+        of column names.
+
+        The method returns an immutable OrderedDict where the first dict element contains the
+        location of the dataframe with the index. This allows an efficient left join operation.
+
+        Example:
+           {
+               "/base_path/component_1": ["Name", "HP"],
+               "/base_path/component_2": ["Type 1", "Type 2"],
+           }
+        """
+        field_mapping = {}
+        for field_name, field in {"id": self.index, **self.fields}.items():
+            location = (
+                f"{self.base_path}/{self.pipeline_name}/{self.run_id}{field.location}"
+            )
+            if location in field_mapping:
+                field_mapping[location].append(field_name)
+            else:
+                field_mapping[location] = [field_name]
+
+        # Sort field mapping that the first dataset contains the index
+        sorted_keys = sorted(
+            field_mapping.keys(),
+            key=lambda key: "id" in field_mapping[key],
+            reverse=True,
+        )
+        sorted_field_mapping = OrderedDict(
+            (key, field_mapping[key]) for key in sorted_keys
+        )
+
+        return types.MappingProxyType(sorted_field_mapping)
+
     @property
     def run_id(self) -> str:
         return self.metadata["run_id"]
@@ -220,39 +210,61 @@ def cache_key(self) -> str:
         return self.metadata["cache_key"]
 
     @property
-    def index(self) -> Index:
-        return Index(self._specification["index"], base_path=self.base_path)
-
-    @property
-    def subsets(self) -> t.Mapping[str, Subset]:
-        """The subsets of the manifest as an immutable mapping."""
+    def fields(self) -> t.Mapping[str, Field]:
+        """The fields of the manifest as an immutable mapping."""
         return types.MappingProxyType(
             {
-                name: Subset(subset, base_path=self.base_path)
-                for name, subset in self._specification["subsets"].items()
+                name: Field(
+                    name=name,
+                    type=Type(field["type"]),
+                    location=field["location"],
+                )
+                for name, field in self._specification["fields"].items()
             },
         )
 
-    def add_subset(
-        self,
-        name: str,
-        fields: t.Iterable[t.Union[Field, t.Tuple[str, Type]]],
-    ) -> None:
-        if name in self._specification["subsets"]:
-            msg = f"A subset with name {name} already exists"
+    def add_or_update_field(self, field: Field, overwrite: bool = False):
+        """Add or update field to manifest."""
+        if field.name == "index":
+            self._add_or_update_index(field, overwrite=True)
+        elif overwrite is False and field.name in self._specification["fields"]:
+            msg = (
+                f"A field with name {field.name} already exists. Set overwrite to true, "
+                f"if you want to update the field."
+            )
+            raise ValueError(msg)
+        else:
+            self._specification["fields"][field.name] = {
+                "location": f"/{self.component_id}",
+                **field.type.to_json(),
+            }
+
+    def _add_or_update_index(self, field: Field, overwrite: bool = True):
+        """Add or update the manifest index."""
+        if overwrite is False:
+            msg = (
+                "The index already exists. Set overwrite to true, "
+                "if you want to update the index."
+            )
+            raise ValueError(msg)
+
+        if field.name != "index":
+            msg = (
+                f"The field name is {field.name}. If you try to update the index, set the field"
+                f"name to `index`."
+            )
             raise ValueError(msg)
 
-        self._specification["subsets"][name] = {
-            "location": f"/{self.pipeline_name}/{self.run_id}/{self.component_id}/{name}",
-            "fields": {name: type_.to_json() for name, type_ in fields},
+        self._specification["index"] = {
+            "location": f"/{field.location}",
         }
 
-    def remove_subset(self, name: str) -> None:
-        if name not in self._specification["subsets"]:
-            msg = f"Subset {name} not found in specification"
+    def remove_field(self, name: str) -> None:
+        if name not in self._specification["fields"]:
+            msg = f"Field {name} not found in specification"
             raise ValueError(msg)
 
-        del self._specification["subsets"][name]
+        del self._specification["fields"][name]
 
     def evolve(  # noqa : PLR0912 (too many branches)
         self,
@@ -274,68 +286,23 @@ def evolve(  # noqa : PLR0912 (too many branches)
         # Update `component_id` of the metadata
         component_id = component_spec.component_folder_name
         evolved_manifest.update_metadata(key="component_id", value=component_id)
+
         if run_id is not None:
             evolved_manifest.update_metadata(key="run_id", value=run_id)
 
-        # Update index location as this is currently always rewritten
-        evolved_manifest.index._specification[
-            "location"
-        ] = f"/{self.pipeline_name}/{evolved_manifest.run_id}/{component_id}/index"
-
-        # If additionalSubsets is False in consumes,
-        # Remove all subsets from the manifest that are not listed
-        if not component_spec.accepts_additional_subsets:
-            for subset_name in evolved_manifest.subsets:
-                if subset_name not in component_spec.consumes:
-                    evolved_manifest.remove_subset(subset_name)
-
-        # If additionalSubsets is False in produces,
-        # Remove all subsets from the manifest that are not listed
-        if not component_spec.outputs_additional_subsets:
-            for subset_name in evolved_manifest.subsets:
-                if subset_name not in component_spec.produces:
-                    evolved_manifest.remove_subset(subset_name)
-
-        # If additionalFields is False for a consumed subset,
-        # Remove all fields from that subset that are not listed
-        for subset_name, subset in component_spec.consumes.items():
-            if subset_name in evolved_manifest.subsets and not subset.additional_fields:
-                for field_name in evolved_manifest.subsets[subset_name].fields:
-                    if field_name not in subset.fields:
-                        evolved_manifest.subsets[subset_name].remove_field(
-                            field_name,
-                        )
-
-        # For each output subset defined in the component, add or update it
-        for subset_name, subset in component_spec.produces.items():
-            # Subset is already in manifest, update it
-            if subset_name in evolved_manifest.subsets:
-                # If additional fields are not allowed, remove the fields not defined in the
-                # component spec produces section
-                if not subset.additional_fields:
-                    for field_name in evolved_manifest.subsets[subset_name].fields:
-                        if field_name not in subset.fields:
-                            evolved_manifest.subsets[subset_name].remove_field(
-                                field_name,
-                            )
-
-                # Add fields defined in the component spec produces section
-                # Overwrite to persist changes to the field (eg. type of column)
-                for field in subset.fields.values():
-                    evolved_manifest.subsets[subset_name].add_field(
-                        field.name,
-                        field.type,
-                        overwrite=True,
-                    )
-
-                # Update subset location as this is currently always rewritten
-                evolved_manifest.subsets[subset_name]._specification[
-                    "location"
-                ] = f"/{self.pipeline_name}/{evolved_manifest.run_id}/{component_id}/{subset_name}"
-
-            # Subset is not yet in manifest, add it
-            else:
-                evolved_manifest.add_subset(subset_name, subset.fields.values())
+        # Update index location as this is always rewritten
+        evolved_manifest.add_or_update_field(
+            Field(name="index", location=component_spec.component_folder_name),
+        )
+
+        # TODO handle additionalFields
+
+        # Add or update all produced fields defined in the component spec
+        for name, field in component_spec.produces.items():
+            # If field was not part of the input manifest, add field to output manifest.
+            # If field was part of the input manifest and got produced by the component, update
+            # the manifest field.
+            evolved_manifest.add_or_update_field(field, overwrite=True)
 
         return evolved_manifest
 
diff --git a/src/fondant/core/schema.py b/src/fondant/core/schema.py
index ca9bb0944..dc940b5f7 100644
--- a/src/fondant/core/schema.py
+++ b/src/fondant/core/schema.py
@@ -5,6 +5,7 @@
 import os
 import re
 import typing as t
+from dataclasses import dataclass
 from enum import Enum
 
 import pyarrow as pa
@@ -161,11 +162,33 @@ def __eq__(self, other):
         return False
 
 
-class Field(t.NamedTuple):
-    """Class representing a single field or column in a Fondant subset."""
+class Field:
+    """Class representing a single field or column in a Fondant dataset."""
 
-    name: str
-    type: Type
+    def __init__(
+        self,
+        name: str,
+        type: Type = None,
+        location: str = "",
+    ) -> None:
+        self._name = name
+        self._type = type
+        self._location = location
+
+    @property
+    def name(self) -> str:
+        """The name of the field."""
+        return self._name
+
+    @property
+    def type(self) -> Type:
+        """The absolute location of the field."""
+        return self._type
+
+    @property
+    def location(self) -> str:
+        """The relative location of the field."""
+        return self._location
 
 
 def validate_partition_size(arg_value):
diff --git a/src/fondant/core/schemas/component_spec.json b/src/fondant/core/schemas/component_spec.json
index 8d684a3e5..064ea027d 100644
--- a/src/fondant/core/schemas/component_spec.json
+++ b/src/fondant/core/schemas/component_spec.json
@@ -28,44 +28,16 @@
       }
     },
     "consumes": {
-      "$ref": "#/definitions/subsets"
+      "$ref": "common.json#/definitions/fields"
     },
     "produces": {
-      "$ref": "#/definitions/subsets"
+      "$ref": "common.json#/definitions/fields"
     },
     "args": {
       "$ref": "#/definitions/args"
     }
   },
   "definitions": {
-    "subset": {
-      "type": "object",
-      "properties": {
-        "fields": {
-          "$ref": "common.json#/definitions/fields"
-        },
-        "additionalFields": {
-          "type": "boolean",
-          "default": true
-        }
-      },
-      "required": [
-        "fields"
-      ]
-    },
-    "subsets": {
-      "type": "object",
-      "properties": {
-        "additionalSubsets": {
-          "type": "boolean",
-          "default": true
-        }
-      },
-      "minProperties": 1,
-      "additionalProperties": {
-        "$ref": "#/definitions/subset"
-      }
-    },
     "args": {
       "type": "object",
       "minProperties": 1,
diff --git a/src/fondant/core/schemas/manifest.json b/src/fondant/core/schemas/manifest.json
index 00ad6d1cc..77365dd5f 100644
--- a/src/fondant/core/schemas/manifest.json
+++ b/src/fondant/core/schemas/manifest.json
@@ -37,36 +37,33 @@
         "location"
       ]
     },
-    "subsets": {
-      "$ref": "#/definitions/subsets"
+    "fields": {
+      "$ref": "#/definitions/fields"
     }
   },
   "required": [
     "metadata",
     "index",
-    "subsets"
+    "fields"
   ],
   "definitions": {
-    "subset": {
+    "field": {
       "type": "object",
       "properties": {
         "location": {
           "type": "string",
           "pattern": "/.*"
-        },
-        "fields": {
-          "$ref": "common.json#/definitions/fields"
         }
       },
       "required": [
         "location",
-        "fields"
+        "type"
       ]
     },
-    "subsets": {
+    "fields": {
       "type": "object",
       "additionalProperties": {
-        "$ref": "#/definitions/subset"
+        "$ref": "#/definitions/field"
       }
     }
   }
diff --git a/tests/example_specs/evolution_examples/4/component.yaml b/tests/core/examples/component_specs/invalid_component.yaml
similarity index 84%
rename from tests/example_specs/evolution_examples/4/component.yaml
rename to tests/core/examples/component_specs/invalid_component.yaml
index 067b06da0..d1c88c444 100644
--- a/tests/example_specs/evolution_examples/4/component.yaml
+++ b/tests/core/examples/component_specs/invalid_component.yaml
@@ -7,14 +7,14 @@ consumes:
     fields:
       data:
         type: binary
-  
+
 produces:
-  images:
+  captions:
     fields:
-      encoding:
+      data:
         type: string
 
-args:
+Arguments:
   storage_args:
     description: Storage arguments
-    type: str
+    type: str
\ No newline at end of file
diff --git a/tests/example_specs/component_specs/kubeflow_component.yaml b/tests/core/examples/component_specs/kubeflow_component.yaml
similarity index 100%
rename from tests/example_specs/component_specs/kubeflow_component.yaml
rename to tests/core/examples/component_specs/kubeflow_component.yaml
diff --git a/tests/example_specs/evolution_examples/1/component.yaml b/tests/core/examples/component_specs/valid_component.yaml
similarity index 62%
rename from tests/example_specs/evolution_examples/1/component.yaml
rename to tests/core/examples/component_specs/valid_component.yaml
index 22ae0feb1..1215af1bd 100644
--- a/tests/example_specs/evolution_examples/1/component.yaml
+++ b/tests/core/examples/component_specs/valid_component.yaml
@@ -1,20 +1,21 @@
 name: Example component
 description: This is an example component
 image: example_component:latest
+tags:
+  - Data loading
 
 consumes:
   images:
-    fields:
-      data:
-        type: binary
-  
-produces:
+    type: binary
+
   embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
+    type: array
+    items:
+      type: float32
+
+produces:
+  captions:
+    type: string
 
 args:
   storage_args:
diff --git a/tests/example_specs/component_specs/valid_component_no_args.yaml b/tests/core/examples/component_specs/valid_component_no_args.yaml
similarity index 59%
rename from tests/example_specs/component_specs/valid_component_no_args.yaml
rename to tests/core/examples/component_specs/valid_component_no_args.yaml
index c3adfa6aa..de11cb2ee 100644
--- a/tests/example_specs/component_specs/valid_component_no_args.yaml
+++ b/tests/core/examples/component_specs/valid_component_no_args.yaml
@@ -4,12 +4,13 @@ image: example_component:latest
 
 consumes:
   images:
-    fields:
-      data:
-        type: binary
+    type: binary
+
+  embeddings:
+    type: array
+    items:
+      type: float32
 
 produces:
   captions:
-    fields:
-      data:
-        type: string
\ No newline at end of file
+    type: string
diff --git a/tests/example_specs/component_specs/invalid_component.yaml b/tests/core/examples/evolution_examples/1/component.yaml
similarity index 59%
rename from tests/example_specs/component_specs/invalid_component.yaml
rename to tests/core/examples/evolution_examples/1/component.yaml
index 3fc8128b5..e91ae6f46 100644
--- a/tests/example_specs/component_specs/invalid_component.yaml
+++ b/tests/core/examples/evolution_examples/1/component.yaml
@@ -3,14 +3,16 @@ description: This is an example component
 image: example_component:latest
 
 consumes:
-  images:
-    data: binary
+  images_data:
+    type: binary
 
 produces:
-  captions:
-    data: string
+  embeddings_data:
+    type: array
+    items:
+      type: float32
 
-Arguments:
+args:
   storage_args:
     description: Storage arguments
-    type: str
\ No newline at end of file
+    type: str
diff --git a/tests/core/examples/evolution_examples/1/output_manifest.json b/tests/core/examples/evolution_examples/1/output_manifest.json
new file mode 100644
index 000000000..2a73e5f29
--- /dev/null
+++ b/tests/core/examples/evolution_examples/1/output_manifest.json
@@ -0,0 +1,36 @@
+{
+   "metadata":{
+      "pipeline_name":"test_pipeline",
+      "base_path":"gs://bucket",
+      "run_id":"custom_run_id",
+      "component_id":"example_component"
+   },
+   "index":{
+      "location":"/example_component"
+   },
+   "fields": {
+      "images_width": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_height": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_data": {
+         "type": "binary",
+         "location":"/example_component"
+      },
+      "captions_data": {
+         "type": "binary",
+         "location":"/example_component"
+      },
+      "embeddings_data": {
+        "type": "array",
+        "items": {
+           "type": "float32"
+        },
+        "location":"/example_component"
+      }
+   }
+}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/8/component.yaml b/tests/core/examples/evolution_examples/2/component.yaml
similarity index 69%
rename from tests/example_specs/evolution_examples/8/component.yaml
rename to tests/core/examples/evolution_examples/2/component.yaml
index 5c204b9c2..2352adcb5 100644
--- a/tests/example_specs/evolution_examples/8/component.yaml
+++ b/tests/core/examples/evolution_examples/2/component.yaml
@@ -3,10 +3,12 @@ description: This is an example component
 image: example_component:latest
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
+
+produces:
+  images_encoding:
+    type: string
 
 args:
   storage_args:
diff --git a/tests/core/examples/evolution_examples/2/output_manifest.json b/tests/core/examples/evolution_examples/2/output_manifest.json
new file mode 100644
index 000000000..ca1f6f361
--- /dev/null
+++ b/tests/core/examples/evolution_examples/2/output_manifest.json
@@ -0,0 +1,33 @@
+{
+   "metadata":{
+      "pipeline_name":"test_pipeline",
+      "base_path":"gs://bucket",
+      "run_id":"custom_run_id",
+      "component_id":"example_component"
+   },
+   "index":{
+      "location":"/example_component"
+   },
+   "fields": {
+      "images_width": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_height": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_data": {
+         "type": "binary",
+         "location":"/example_component"
+      },
+      "captions_data": {
+         "type": "binary",
+         "location":"/example_component"
+      },
+      "images_encoding": {
+         "type": "string",
+         "location":"/example_component"
+      }
+   }
+}
\ No newline at end of file
diff --git a/tests/core/examples/evolution_examples/3/component.yaml b/tests/core/examples/evolution_examples/3/component.yaml
new file mode 100644
index 000000000..13b1427b3
--- /dev/null
+++ b/tests/core/examples/evolution_examples/3/component.yaml
@@ -0,0 +1,16 @@
+name: Example component 1
+description: This is an example component
+image: example_component_1:latest
+
+consumes:
+  images_data:
+    type: binary
+
+produces:
+  images_data:
+    type: string
+
+args:
+  storage_args:
+    description: Storage arguments
+    type: str
diff --git a/tests/core/examples/evolution_examples/3/output_manifest.json b/tests/core/examples/evolution_examples/3/output_manifest.json
new file mode 100644
index 000000000..b11f7d8a3
--- /dev/null
+++ b/tests/core/examples/evolution_examples/3/output_manifest.json
@@ -0,0 +1,29 @@
+{
+   "metadata":{
+      "pipeline_name":"test_pipeline",
+      "base_path":"gs://bucket",
+      "run_id":"custom_run_id",
+      "component_id":"example_component_1"
+   },
+   "index":{
+      "location":"/example_component_1"
+   },
+   "fields": {
+      "images_width": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_height": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_data": {
+         "type": "string",
+         "location":"/example_component_1"
+      },
+      "captions_data": {
+         "type": "binary",
+         "location":"/example_component"
+      }
+   }
+}
\ No newline at end of file
diff --git a/tests/core/examples/evolution_examples/4/component.yaml b/tests/core/examples/evolution_examples/4/component.yaml
new file mode 100644
index 000000000..1b766036d
--- /dev/null
+++ b/tests/core/examples/evolution_examples/4/component.yaml
@@ -0,0 +1,12 @@
+name: Example component 1
+description: This is an example component
+image: example_component_1:latest
+
+consumes:
+  images_data:
+    type: binary
+
+args:
+  storage_args:
+    description: Storage arguments
+    type: str
diff --git a/tests/core/examples/evolution_examples/4/output_manifest.json b/tests/core/examples/evolution_examples/4/output_manifest.json
new file mode 100644
index 000000000..929c380ab
--- /dev/null
+++ b/tests/core/examples/evolution_examples/4/output_manifest.json
@@ -0,0 +1,29 @@
+{
+   "metadata":{
+      "pipeline_name":"test_pipeline",
+      "base_path":"gs://bucket",
+      "run_id":"custom_run_id",
+      "component_id":"example_component_1"
+   },
+   "index":{
+      "location":"/example_component_1"
+   },
+   "fields": {
+      "images_width": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_height": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_data": {
+         "type": "binary",
+         "location":"/example_component"
+      },
+      "captions_data": {
+         "type": "binary",
+         "location":"/example_component"
+      }
+   }
+}
\ No newline at end of file
diff --git a/tests/core/examples/evolution_examples/input_manifest.json b/tests/core/examples/evolution_examples/input_manifest.json
new file mode 100644
index 000000000..664367cc2
--- /dev/null
+++ b/tests/core/examples/evolution_examples/input_manifest.json
@@ -0,0 +1,29 @@
+{
+   "metadata":{
+      "pipeline_name":"test_pipeline",
+      "base_path":"gs://bucket",
+      "run_id":"12345",
+      "component_id":"example_component"
+   },
+   "index":{
+      "location":"/example_component"
+   },
+   "fields": {
+       "images_width": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_height": {
+         "type": "int32",
+         "location":"/example_component"
+      },
+      "images_data": {
+         "type": "binary",
+         "location":"/example_component"
+      },
+      "captions_data": {
+         "type": "binary",
+         "location":"/example_component"
+      }
+   }
+}
\ No newline at end of file
diff --git a/tests/core/examples/manifests/invalid_manifest.json b/tests/core/examples/manifests/invalid_manifest.json
new file mode 100644
index 000000000..51ec6c5e5
--- /dev/null
+++ b/tests/core/examples/manifests/invalid_manifest.json
@@ -0,0 +1,14 @@
+{
+  "metadata": {
+    "pipeline_name": "test_pipeline",
+    "base_path": "gs://bucket",
+    "run_id": "test_pipeline_12345",
+    "component_id": "67890"
+  },
+  "index": {
+    "location": "/component1"
+  },
+  "fields": {
+    "images": {}
+  }
+}
\ No newline at end of file
diff --git a/tests/core/examples/manifests/valid_manifest.json b/tests/core/examples/manifests/valid_manifest.json
new file mode 100644
index 000000000..0f7c58126
--- /dev/null
+++ b/tests/core/examples/manifests/valid_manifest.json
@@ -0,0 +1,29 @@
+{
+  "metadata": {
+    "pipeline_name": "test_pipeline",
+    "base_path": "gs://bucket",
+    "run_id": "test_pipeline_12345",
+    "component_id": "67890"
+  },
+  "index": {
+    "location": "/component1"
+  },
+  "fields":{
+    "images": {
+      "location": "/component1",
+      "type": "binary"
+    },
+    "height": {
+      "location": "/component2",
+      "type": "int32"
+    },
+    "width": {
+      "location": "/component2",
+      "type": "int32"
+    },
+    "caption": {
+      "location": "/component3",
+      "type": "string"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_component_specs.py b/tests/core/test_component_specs.py
similarity index 85%
rename from tests/test_component_specs.py
rename to tests/core/test_component_specs.py
index caf0344de..dcbf4c2ed 100644
--- a/tests/test_component_specs.py
+++ b/tests/core/test_component_specs.py
@@ -8,13 +8,12 @@
 import yaml
 from fondant.core.component_spec import (
     ComponentSpec,
-    ComponentSubset,
     KubeflowComponentSpec,
 )
 from fondant.core.exceptions import InvalidComponentSpec
 from fondant.core.schema import Type
 
-component_specs_path = Path(__file__).parent / "example_specs/component_specs"
+component_specs_path = Path(__file__).parent / "examples/component_specs"
 
 
 @pytest.fixture()
@@ -49,12 +48,19 @@ def test_component_spec_pkgutil_error(mock_get_data):
 
 
 def test_component_spec_validation(valid_fondant_schema, invalid_fondant_schema):
-    """Test that the manifest is validated correctly on instantiation."""
+    """Test that the component spec is validated correctly on instantiation."""
     ComponentSpec(valid_fondant_schema)
     with pytest.raises(InvalidComponentSpec):
         ComponentSpec(invalid_fondant_schema)
 
 
+def test_component_spec_load_from_file(valid_fondant_schema, invalid_fondant_schema):
+    """Test that the component spec is validated correctly on instantiation."""
+    ComponentSpec.from_file(component_specs_path / "valid_component.yaml")
+    with pytest.raises(InvalidComponentSpec):
+        ComponentSpec.from_file(component_specs_path / "invalid_component.yaml")
+
+
 def test_attribute_access(valid_fondant_schema):
     """
     Test that attributes can be accessed as expected:
@@ -65,8 +71,8 @@ def test_attribute_access(valid_fondant_schema):
 
     assert fondant_component.name == "Example component"
     assert fondant_component.description == "This is an example component"
-    assert fondant_component.consumes["images"].fields["data"].type == Type("binary")
-    assert fondant_component.consumes["embeddings"].fields["data"].type == Type.list(
+    assert fondant_component.consumes["images"].type == Type("binary")
+    assert fondant_component.consumes["embeddings"].type == Type.list(
         Type("float32"),
     )
 
@@ -129,15 +135,3 @@ def test_kubeflow_component_spec_repr(valid_kubeflow_schema):
     kubeflow_component_spec = KubeflowComponentSpec(valid_kubeflow_schema)
     expected_repr = f"KubeflowComponentSpec({valid_kubeflow_schema!r})"
     assert repr(kubeflow_component_spec) == expected_repr
-
-
-def test_component_subset_repr():
-    """Test that the __repr__ method of ComponentSubset returns the expected string."""
-    component_subset_schema = {
-        "name": "Example subset",
-        "description": "This is an example subset",
-    }
-
-    component_subset = ComponentSubset(component_subset_schema)
-    expected_repr = f"ComponentSubset({component_subset_schema!r})"
-    assert repr(component_subset) == expected_repr
diff --git a/tests/core/test_manifest.py b/tests/core/test_manifest.py
new file mode 100644
index 000000000..0b255b9df
--- /dev/null
+++ b/tests/core/test_manifest.py
@@ -0,0 +1,246 @@
+import json
+import pkgutil
+from collections import OrderedDict
+from pathlib import Path
+
+import pytest
+from fondant.core.component_spec import ComponentSpec
+from fondant.core.exceptions import InvalidManifest
+from fondant.core.manifest import Field, Manifest, Type
+
+manifest_path = Path(__file__).parent / "examples" / "manifests"
+component_specs_path = Path(__file__).parent / "examples" / "component_specs"
+
+
+@pytest.fixture()
+def valid_manifest():
+    with open(manifest_path / "valid_manifest.json") as f:
+        return json.load(f)
+
+
+@pytest.fixture()
+def invalid_manifest():
+    with open(manifest_path / "invalid_manifest.json") as f:
+        return json.load(f)
+
+
+def test_manifest_validation(valid_manifest, invalid_manifest):
+    """Test that the manifest is validated correctly on instantiation."""
+    Manifest(valid_manifest)
+    with pytest.raises(InvalidManifest):
+        Manifest(invalid_manifest)
+
+
+def test_set_base_path(valid_manifest):
+    """Test altering the base path in the manifest."""
+    manifest = Manifest(valid_manifest)
+    tmp_path = "/tmp/base_path"
+    manifest.update_metadata(key="base_path", value=tmp_path)
+
+    assert manifest.base_path == tmp_path
+    assert manifest._specification["metadata"]["base_path"] == tmp_path
+
+
+def test_from_to_file(valid_manifest):
+    """Test reading from and writing to file."""
+    tmp_path = "/tmp/manifest.json"
+    with open(tmp_path, "w", encoding="utf-8") as f:
+        json.dump(valid_manifest, f)
+
+    manifest = Manifest.from_file(tmp_path)
+    assert manifest.metadata == valid_manifest["metadata"]
+
+    manifest.to_file(tmp_path)
+    with open(tmp_path, encoding="utf-8") as f:
+        assert json.load(f) == valid_manifest
+
+
+def test_attribute_access(valid_manifest):
+    """
+    Test that attributes can be accessed as expected:
+    - Fixed properties should be accessible as an attribute
+    - Dynamic properties should be accessible by lookup.
+    """
+    manifest = Manifest(valid_manifest)
+
+    assert manifest.metadata == valid_manifest["metadata"]
+    assert manifest.index.location == "/component1"
+    assert manifest.fields["images"].location == "/component1"
+    assert manifest.fields["images"].type == Type("binary")
+
+
+def test_manifest_creation():
+    """Test the stepwise creation of a manifest via the Manifest class."""
+    base_path = "gs://bucket"
+    run_id = "run_id"
+    pipeline_name = "pipeline_name"
+    component_id = "component_id"
+    cache_key = "42"
+
+    manifest = Manifest.create(
+        pipeline_name=pipeline_name,
+        base_path=base_path,
+        run_id=run_id,
+        component_id=component_id,
+        cache_key=cache_key,
+    )
+
+    manifest.add_or_update_field(Field(name="width", type=Type("int32")))
+    manifest.add_or_update_field(Field(name="height", type=Type("int32")))
+    manifest.add_or_update_field(Field(name="data", type=Type("binary")))
+
+    assert manifest._specification == {
+        "metadata": {
+            "pipeline_name": pipeline_name,
+            "base_path": base_path,
+            "run_id": run_id,
+            "component_id": component_id,
+            "cache_key": cache_key,
+        },
+        "index": {"location": f"/{component_id}"},
+        "fields": {
+            "width": {
+                "type": "int32",
+                "location": f"/{component_id}",
+            },
+            "height": {
+                "type": "int32",
+                "location": f"/{component_id}",
+            },
+            "data": {
+                "type": "binary",
+                "location": f"/{component_id}",
+            },
+        },
+    }
+
+
+def test_manifest_repr():
+    manifest = Manifest.create(
+        pipeline_name="NAME",
+        base_path="/",
+        run_id="A",
+        component_id="1",
+        cache_key="42",
+    )
+    assert (
+        manifest.__repr__()
+        == "Manifest({'metadata': {'base_path': '/', 'pipeline_name': 'NAME', 'run_id': 'A',"
+        " 'component_id': '1', 'cache_key': '42'},"
+        " 'index': {'location': '/1'}, 'fields': {}})"
+    )
+
+
+def test_manifest_alteration(valid_manifest):
+    """Test alteration functionalities of a manifest via the Manifest class."""
+    manifest = Manifest(valid_manifest)
+
+    # test adding a subset
+    manifest.add_or_update_field(Field(name="width2", type=Type("int32")))
+    manifest.add_or_update_field(Field(name="height2", type=Type("int32")))
+
+    assert "width2" in manifest.fields
+    assert "height2" in manifest.fields
+
+    # test adding a duplicate subset
+    with pytest.raises(ValueError, match="A field with name width2 already exists"):
+        manifest.add_or_update_field(Field(name="width2", type=Type("int32")))
+
+    # test removing a subset
+    manifest.remove_field("width2")
+    assert "images2" not in manifest.fields
+
+    # test removing a nonexistant subset
+    with pytest.raises(ValueError, match="Field pictures not found in specification"):
+        manifest.remove_field("pictures")
+
+
+def test_manifest_copy_and_adapt(valid_manifest):
+    """Test that a manifest can be copied and adapted without changing the original."""
+    manifest = Manifest(valid_manifest)
+    new_manifest = manifest.copy()
+    new_manifest.remove_field("images")
+    assert manifest._specification == valid_manifest
+    assert new_manifest._specification != valid_manifest
+
+
+def test_no_validate_schema(monkeypatch, valid_manifest):
+    monkeypatch.setattr(pkgutil, "get_data", lambda package, resource: None)
+    with pytest.raises(FileNotFoundError):
+        Manifest(valid_manifest)
+
+
+def test_evolve_manifest():
+    """Test that the fields are evolved as expected."""
+    run_id = "A"
+    spec = ComponentSpec.from_file(component_specs_path / "valid_component.yaml")
+    input_manifest = Manifest.create(
+        pipeline_name="NAME",
+        base_path="/base_path",
+        run_id=run_id,
+        component_id="component_1",
+        cache_key="42",
+    )
+
+    output_manifest = input_manifest.evolve(component_spec=spec, run_id=run_id)
+
+    assert output_manifest.base_path == input_manifest.base_path
+    assert output_manifest.run_id == run_id
+    assert output_manifest.index.location == "/" + spec.component_folder_name
+    assert output_manifest.fields["captions"].type.name == "string"
+
+
+def test_fields():
+    """Test that the fields can added and updated as expected."""
+    run_id = "A"
+    manifest = Manifest.create(
+        pipeline_name="NAME",
+        base_path="/base_path",
+        run_id=run_id,
+        component_id="component_1",
+        cache_key="42",
+    )
+
+    # add a field
+    manifest.add_or_update_field(Field(name="field_1", type=Type("int32")))
+    assert "field_1" in manifest.fields
+
+    # add a duplicate field, but overwrite (update)
+    manifest.add_or_update_field(
+        Field(name="field_1", type=Type("string")),
+        overwrite=True,
+    )
+    assert manifest.fields["field_1"].type.name == "string"
+
+    # add duplicate field
+    with pytest.raises(
+        ValueError,
+        match="A field with name field_1 already exists. Set overwrite to true, "
+        "if you want to update the field.",
+    ):
+        manifest.add_or_update_field(
+            Field(name="field_1", type=Type("string")),
+            overwrite=False,
+        )
+
+    # delete a field
+    manifest.remove_field(name="field_1")
+    assert "field_1" not in manifest.fields
+
+
+def test_field_mapping(valid_manifest):
+    """Test field mapping generation."""
+    manifest = Manifest(valid_manifest)
+    manifest.add_or_update_field(Field(name="index", location="component2"))
+    field_mapping = manifest.field_mapping
+    assert field_mapping == OrderedDict(
+        {
+            "gs://bucket/test_pipeline/test_pipeline_12345/component2": [
+                "id",
+                "height",
+                "width",
+            ],
+            "gs://bucket/test_pipeline/test_pipeline_12345/component1": ["images"],
+            "gs://bucket/test_pipeline/test_pipeline_12345/component3": ["caption"],
+        },
+    )
diff --git a/tests/test_manifest_evolution.py b/tests/core/test_manifest_evolution.py
similarity index 83%
rename from tests/test_manifest_evolution.py
rename to tests/core/test_manifest_evolution.py
index c79b76aaf..0d9181701 100644
--- a/tests/test_manifest_evolution.py
+++ b/tests/core/test_manifest_evolution.py
@@ -6,7 +6,7 @@
 from fondant.core.component_spec import ComponentSpec
 from fondant.core.manifest import Manifest
 
-examples_path = Path(__file__).parent / "example_specs/evolution_examples"
+examples_path = Path(__file__).parent / "examples/evolution_examples"
 
 
 @pytest.fixture()
@@ -41,7 +41,7 @@ def test_component_spec_location_update():
     with open(examples_path / "input_manifest.json") as f:
         input_manifest = json.load(f)
 
-    with open(examples_path / "7/component.yaml") as f:
+    with open(examples_path / "4/component.yaml") as f:
         specification = yaml.safe_load(f)
 
     manifest = Manifest(input_manifest)
@@ -50,7 +50,4 @@ def test_component_spec_location_update():
         component_spec=component_spec,
     )
 
-    assert (
-        evolved_manifest._specification["subsets"]["images"]["location"]
-        == "/test_pipeline/12345/example_component/images"
-    )
+    assert evolved_manifest.index.location == "/" + component_spec.component_folder_name
diff --git a/tests/test_schema.py b/tests/core/test_schema.py
similarity index 100%
rename from tests/test_schema.py
rename to tests/core/test_schema.py
diff --git a/tests/example_specs/component_specs/valid_component.yaml b/tests/example_specs/component_specs/valid_component.yaml
deleted file mode 100644
index c4b99e837..000000000
--- a/tests/example_specs/component_specs/valid_component.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-tags:
-  - Data loading
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-produces:
-  captions:
-    fields:
-      data:
-        type: string
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
\ No newline at end of file
diff --git a/tests/example_specs/components/input_manifest.json b/tests/example_specs/components/input_manifest.json
deleted file mode 100644
index 7af13d599..000000000
--- a/tests/example_specs/components/input_manifest.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "test_pipeline",
-    "base_path": "/bucket",
-    "run_id": "test_pipeline_12345",
-    "component_id": "67890"
-  },
-  "index": {
-    "location": "/index/12345/example_component"
-  },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
-    }
-
-  }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/1/output_manifest.json b/tests/example_specs/evolution_examples/1/output_manifest.json
deleted file mode 100644
index 17b94c0b0..000000000
--- a/tests/example_specs/evolution_examples/1/output_manifest.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"custom_run_id",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/custom_run_id/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/12345/example_component/images",
-         "fields":{
-            "width":{
-               "type":"int32"
-            },
-            "height":{
-               "type":"int32"
-            },
-            "data":{
-               "type":"binary"
-            }
-         }
-      },
-      "captions":{
-         "location":"/test_pipeline/12345/example_component/captions",
-         "fields":{
-            "data":{
-               "type":"binary"
-            }
-         }
-      },
-      "embeddings":{
-         "location":"/test_pipeline/custom_run_id/example_component/embeddings",
-         "fields":{
-            "data":{
-               "type":"array",
-               "items":{
-                  "type":"float32"
-               }
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/2/component.yaml b/tests/example_specs/evolution_examples/2/component.yaml
deleted file mode 100644
index f37ff99d1..000000000
--- a/tests/example_specs/evolution_examples/2/component.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-  additionalSubsets: false
-  
-produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
diff --git a/tests/example_specs/evolution_examples/2/output_manifest.json b/tests/example_specs/evolution_examples/2/output_manifest.json
deleted file mode 100644
index 3a40b1c9d..000000000
--- a/tests/example_specs/evolution_examples/2/output_manifest.json
+++ /dev/null
@@ -1,38 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"custom_run_id",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/custom_run_id/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/12345/example_component/images",
-         "fields":{
-            "width":{
-               "type":"int32"
-            },
-            "height":{
-               "type":"int32"
-            },
-            "data":{
-               "type":"binary"
-            }
-         }
-      },
-      "embeddings":{
-         "location":"/test_pipeline/custom_run_id/example_component/embeddings",
-         "fields":{
-            "data":{
-               "type":"array",
-               "items":{
-                  "type":"float32"
-               }
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/3/component.yaml b/tests/example_specs/evolution_examples/3/component.yaml
deleted file mode 100644
index 6753a083b..000000000
--- a/tests/example_specs/evolution_examples/3/component.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-    additionalFields: false
-  additionalSubsets: false
-  
-produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
diff --git a/tests/example_specs/evolution_examples/3/output_manifest.json b/tests/example_specs/evolution_examples/3/output_manifest.json
deleted file mode 100644
index a9abda6d0..000000000
--- a/tests/example_specs/evolution_examples/3/output_manifest.json
+++ /dev/null
@@ -1,32 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"custom_run_id",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/custom_run_id/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/12345/example_component/images",
-         "fields":{
-            "data":{
-               "type":"binary"
-            }
-         }
-      },
-      "embeddings":{
-         "location":"/test_pipeline/custom_run_id/example_component/embeddings",
-         "fields":{
-            "data":{
-               "type":"array",
-               "items":{
-                  "type":"float32"
-               }
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/4/output_manifest.json b/tests/example_specs/evolution_examples/4/output_manifest.json
deleted file mode 100644
index 24af4f2ac..000000000
--- a/tests/example_specs/evolution_examples/4/output_manifest.json
+++ /dev/null
@@ -1,38 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"custom_run_id",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/custom_run_id/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/custom_run_id/example_component/images",
-         "fields":{
-            "width":{
-               "type":"int32"
-            },
-            "height":{
-               "type":"int32"
-            },
-            "data":{
-               "type":"binary"
-            },
-            "encoding":{
-               "type":"string"
-            }
-         }
-      },
-      "captions":{
-         "location":"/test_pipeline/12345/example_component/captions",
-         "fields":{
-            "data":{
-               "type":"binary"
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/5/component.yaml b/tests/example_specs/evolution_examples/5/component.yaml
deleted file mode 100644
index 93aaf68b3..000000000
--- a/tests/example_specs/evolution_examples/5/component.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-  
-produces:
-  images:
-    fields:
-      encoding:
-        type: string
-    additionalFields: false
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
diff --git a/tests/example_specs/evolution_examples/5/output_manifest.json b/tests/example_specs/evolution_examples/5/output_manifest.json
deleted file mode 100644
index 8bcf6141d..000000000
--- a/tests/example_specs/evolution_examples/5/output_manifest.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"custom_run_id",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/custom_run_id/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/custom_run_id/example_component/images",
-         "fields":{
-            "encoding":{
-               "type":"string"
-            }
-         }
-      },
-      "captions":{
-         "location":"/test_pipeline/12345/example_component/captions",
-         "fields":{
-            "data":{
-               "type":"binary"
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/6/component.yaml b/tests/example_specs/evolution_examples/6/component.yaml
deleted file mode 100644
index 065061791..000000000
--- a/tests/example_specs/evolution_examples/6/component.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-  
-produces:
-  images:
-    fields:
-      encoding:
-        type: string
-    additionalFields: false
-  additionalSubsets: false
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
diff --git a/tests/example_specs/evolution_examples/6/output_manifest.json b/tests/example_specs/evolution_examples/6/output_manifest.json
deleted file mode 100644
index b7521bf66..000000000
--- a/tests/example_specs/evolution_examples/6/output_manifest.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"custom_run_id",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/custom_run_id/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/custom_run_id/example_component/images",
-         "fields":{
-            "encoding":{
-               "type":"string"
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/7/component.yaml b/tests/example_specs/evolution_examples/7/component.yaml
deleted file mode 100644
index 5746ffa4d..000000000
--- a/tests/example_specs/evolution_examples/7/component.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-  
-produces:
-  images:
-    fields:
-      data:
-        type: string
-    additionalFields: false
-  additionalSubsets: false
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
diff --git a/tests/example_specs/evolution_examples/7/output_manifest.json b/tests/example_specs/evolution_examples/7/output_manifest.json
deleted file mode 100644
index a9eb8a308..000000000
--- a/tests/example_specs/evolution_examples/7/output_manifest.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"custom_run_id",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/custom_run_id/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/custom_run_id/example_component/images",
-         "fields":{
-            "data":{
-               "type":"string"
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/evolution_examples/8/output_manifest.json b/tests/example_specs/evolution_examples/8/output_manifest.json
deleted file mode 100644
index de2621c49..000000000
--- a/tests/example_specs/evolution_examples/8/output_manifest.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "test_pipeline",
-    "base_path": "gs://bucket",
-    "run_id": "custom_run_id",
-    "component_id": "example_component"
-  },
-  "index": {
-    "location": "/test_pipeline/custom_run_id/example_component/index"
-  },
-  "subsets": {
-    "images": {
-      "location": "/test_pipeline/12345/example_component/images",
-      "fields": {
-        "width": {
-          "type": "int32"
-        },
-        "height": {
-          "type": "int32"
-        },
-        "data": {
-          "type": "binary"
-        }
-      }
-    },
-    "captions": {
-      "location": "/test_pipeline/12345/example_component/captions",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
-    }
-  }
-}
diff --git a/tests/example_specs/evolution_examples/input_manifest.json b/tests/example_specs/evolution_examples/input_manifest.json
deleted file mode 100644
index 2ecf37243..000000000
--- a/tests/example_specs/evolution_examples/input_manifest.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
-   "metadata":{
-      "pipeline_name":"test_pipeline",
-      "base_path":"gs://bucket",
-      "run_id":"12345",
-      "component_id":"example_component"
-   },
-   "index":{
-      "location":"/test_pipeline/12345/example_component/index"
-   },
-   "subsets":{
-      "images":{
-         "location":"/test_pipeline/12345/example_component/images",
-         "fields":{
-            "width":{
-               "type":"int32"
-            },
-            "height":{
-               "type":"int32"
-            },
-            "data":{
-               "type":"binary"
-            }
-         }
-      },
-      "captions":{
-         "location":"/test_pipeline/12345/example_component/captions",
-         "fields":{
-            "data":{
-               "type":"binary"
-            }
-         }
-      }
-   }
-}
\ No newline at end of file
diff --git a/tests/example_specs/manifests/invalid_manifest.json b/tests/example_specs/manifests/invalid_manifest.json
deleted file mode 100644
index 3fe8b1097..000000000
--- a/tests/example_specs/manifests/invalid_manifest.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "metadata": {
-    "base_path": "gs://bucket"
-  },
-  "index": {
-    "location": "/index"
-  },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": []
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/example_specs/manifests/valid_manifest.json b/tests/example_specs/manifests/valid_manifest.json
deleted file mode 100644
index 9bc00c512..000000000
--- a/tests/example_specs/manifests/valid_manifest.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "test_pipeline",
-    "base_path": "gs://bucket",
-    "run_id": "test_pipeline_12345",
-    "component_id": "67890"
-  },
-  "index": {
-    "location": "/index"
-  },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": {
-        "data": {
-          "type": "binary"
-        },
-        "height": {
-          "type": "int32"
-        },
-        "width": {
-          "type": "int32"
-        }
-      }
-    },
-    "captions": {
-      "location": "/captions",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json b/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
deleted file mode 100644
index 541775f84..000000000
--- a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
+++ /dev/null
@@ -1,36 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "example_pipeline",
-    "base_path": "tests/example_data/subsets_input/mock_base_path",
-    "run_id": "example_pipeline_2023",
-    "component_id": "component_1",
-    "cache_key": "42"
-  },
-  "index": {
-    "location": "/index"
-  },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": {
-        "data": {
-          "type": "binary"
-        },
-        "height": {
-          "type": "int32"
-        },
-        "width": {
-          "type": "int32"
-        }
-      }
-    },
-    "captions": {
-      "location": "/captions",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/example_component/Dockerfile b/tests/examples/example_component/Dockerfile
similarity index 100%
rename from tests/example_component/Dockerfile
rename to tests/examples/example_component/Dockerfile
diff --git a/tests/example_component/fondant_component.yaml b/tests/examples/example_component/fondant_component.yaml
similarity index 100%
rename from tests/example_component/fondant_component.yaml
rename to tests/examples/example_component/fondant_component.yaml
diff --git a/tests/example_data/components/1.yaml b/tests/examples/example_data/components/1.yaml
similarity index 100%
rename from tests/example_data/components/1.yaml
rename to tests/examples/example_data/components/1.yaml
diff --git a/tests/example_data/manifest.json b/tests/examples/example_data/manifest.json
similarity index 100%
rename from tests/example_data/manifest.json
rename to tests/examples/example_data/manifest.json
diff --git a/tests/example_data/raw/split.py b/tests/examples/example_data/raw/split.py
similarity index 100%
rename from tests/example_data/raw/split.py
rename to tests/examples/example_data/raw/split.py
diff --git a/tests/example_data/raw/testset.parquet b/tests/examples/example_data/raw/testset.parquet
similarity index 100%
rename from tests/example_data/raw/testset.parquet
rename to tests/examples/example_data/raw/testset.parquet
diff --git a/tests/example_data/subsets_input/index/part.0.parquet b/tests/examples/example_data/subsets_input/index/part.0.parquet
similarity index 100%
rename from tests/example_data/subsets_input/index/part.0.parquet
rename to tests/examples/example_data/subsets_input/index/part.0.parquet
diff --git a/tests/example_data/subsets_input/index/part.1.parquet b/tests/examples/example_data/subsets_input/index/part.1.parquet
similarity index 100%
rename from tests/example_data/subsets_input/index/part.1.parquet
rename to tests/examples/example_data/subsets_input/index/part.1.parquet
diff --git a/tests/example_data/subsets_input/index/part.2.parquet b/tests/examples/example_data/subsets_input/index/part.2.parquet
similarity index 100%
rename from tests/example_data/subsets_input/index/part.2.parquet
rename to tests/examples/example_data/subsets_input/index/part.2.parquet
diff --git a/tests/example_data/subsets_input/properties/part.0.parquet b/tests/examples/example_data/subsets_input/properties/part.0.parquet
similarity index 100%
rename from tests/example_data/subsets_input/properties/part.0.parquet
rename to tests/examples/example_data/subsets_input/properties/part.0.parquet
diff --git a/tests/example_data/subsets_input/properties/part.1.parquet b/tests/examples/example_data/subsets_input/properties/part.1.parquet
similarity index 100%
rename from tests/example_data/subsets_input/properties/part.1.parquet
rename to tests/examples/example_data/subsets_input/properties/part.1.parquet
diff --git a/tests/example_data/subsets_input/properties/part.2.parquet b/tests/examples/example_data/subsets_input/properties/part.2.parquet
similarity index 100%
rename from tests/example_data/subsets_input/properties/part.2.parquet
rename to tests/examples/example_data/subsets_input/properties/part.2.parquet
diff --git a/tests/example_data/subsets_input/types/part.0.parquet b/tests/examples/example_data/subsets_input/types/part.0.parquet
similarity index 100%
rename from tests/example_data/subsets_input/types/part.0.parquet
rename to tests/examples/example_data/subsets_input/types/part.0.parquet
diff --git a/tests/example_data/subsets_input/types/part.1.parquet b/tests/examples/example_data/subsets_input/types/part.1.parquet
similarity index 100%
rename from tests/example_data/subsets_input/types/part.1.parquet
rename to tests/examples/example_data/subsets_input/types/part.1.parquet
diff --git a/tests/example_data/subsets_input/types/part.2.parquet b/tests/examples/example_data/subsets_input/types/part.2.parquet
similarity index 100%
rename from tests/example_data/subsets_input/types/part.2.parquet
rename to tests/examples/example_data/subsets_input/types/part.2.parquet
diff --git a/tests/example_modules/component.py b/tests/examples/example_modules/component.py
similarity index 100%
rename from tests/example_modules/component.py
rename to tests/examples/example_modules/component.py
diff --git a/tests/example_modules/invalid_component.py b/tests/examples/example_modules/invalid_component.py
similarity index 100%
rename from tests/example_modules/invalid_component.py
rename to tests/examples/example_modules/invalid_component.py
diff --git a/tests/example_modules/invalid_double_components.py b/tests/examples/example_modules/invalid_double_components.py
similarity index 100%
rename from tests/example_modules/invalid_double_components.py
rename to tests/examples/example_modules/invalid_double_components.py
diff --git a/tests/example_modules/invalid_double_pipeline.py b/tests/examples/example_modules/invalid_double_pipeline.py
similarity index 100%
rename from tests/example_modules/invalid_double_pipeline.py
rename to tests/examples/example_modules/invalid_double_pipeline.py
diff --git a/tests/example_modules/pipeline.py b/tests/examples/example_modules/pipeline.py
similarity index 100%
rename from tests/example_modules/pipeline.py
rename to tests/examples/example_modules/pipeline.py
diff --git a/tests/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml b/tests/examples/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml
similarity index 100%
rename from tests/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml
rename to tests/examples/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml
diff --git a/tests/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
rename to tests/examples/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
diff --git a/tests/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
rename to tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
diff --git a/tests/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
rename to tests/examples/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
diff --git a/tests/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
rename to tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
diff --git a/tests/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
rename to tests/examples/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
diff --git a/tests/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
rename to tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
diff --git a/tests/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile
rename to tests/examples/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile
diff --git a/tests/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
rename to tests/examples/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
diff --git a/tests/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile
rename to tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile
diff --git a/tests/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
rename to tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
diff --git a/tests/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile
rename to tests/examples/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile
diff --git a/tests/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
rename to tests/examples/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
diff --git a/tests/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile
rename to tests/examples/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile
diff --git a/tests/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
similarity index 100%
rename from tests/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
rename to tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
diff --git a/tests/example_specs/components/arguments/component.yaml b/tests/examples/example_specs/components/arguments/component.yaml
similarity index 100%
rename from tests/example_specs/components/arguments/component.yaml
rename to tests/examples/example_specs/components/arguments/component.yaml
diff --git a/tests/example_specs/components/arguments/component_default_args.yaml b/tests/examples/example_specs/components/arguments/component_default_args.yaml
similarity index 100%
rename from tests/example_specs/components/arguments/component_default_args.yaml
rename to tests/examples/example_specs/components/arguments/component_default_args.yaml
diff --git a/tests/example_specs/components/arguments/input_manifest.json b/tests/examples/example_specs/components/arguments/input_manifest.json
similarity index 60%
rename from tests/example_specs/components/arguments/input_manifest.json
rename to tests/examples/example_specs/components/arguments/input_manifest.json
index d98ddd95b..9ee2494f9 100644
--- a/tests/example_specs/components/arguments/input_manifest.json
+++ b/tests/examples/example_specs/components/arguments/input_manifest.json
@@ -7,16 +7,12 @@
     "cache_key": "00"
   },
   "index": {
-    "location": "/index"
+    "location": "/component_1"
   },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
+  "fields": {
+    "data": {
+      "type": "binary",
+      "location": "/component_1"
     }
   }
 }
\ No newline at end of file
diff --git a/tests/example_specs/components/component.yaml b/tests/examples/example_specs/components/component.yaml
similarity index 56%
rename from tests/example_specs/components/component.yaml
rename to tests/examples/example_specs/components/component.yaml
index 19c8d5856..973cc3e6b 100644
--- a/tests/example_specs/components/component.yaml
+++ b/tests/examples/example_specs/components/component.yaml
@@ -3,19 +3,15 @@ description: This is an example component
 image: example_component:latest
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-    additionalFields: false
+  images_data:
+    type: array
+    items:
+      type: float32
+additionalFields: false
 
 
 args:
diff --git a/tests/examples/example_specs/components/input_manifest.json b/tests/examples/example_specs/components/input_manifest.json
new file mode 100644
index 000000000..80fa0b91d
--- /dev/null
+++ b/tests/examples/example_specs/components/input_manifest.json
@@ -0,0 +1,17 @@
+{
+  "metadata": {
+    "pipeline_name": "test_pipeline",
+    "base_path": "/bucket",
+    "run_id": "test_pipeline_12345",
+    "component_id": "67890"
+  },
+  "index": {
+    "location": "/example_component"
+  },
+  "fields": {
+    "data": {
+      "location": "/example_component",
+      "type": "binary"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/example_specs/mock_base_path/example_pipeline/cache/42.txt b/tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt
similarity index 100%
rename from tests/example_specs/mock_base_path/example_pipeline/cache/42.txt
rename to tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt
diff --git a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
new file mode 100644
index 000000000..47c2fe949
--- /dev/null
+++ b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
@@ -0,0 +1,31 @@
+{
+  "metadata": {
+    "pipeline_name": "example_pipeline",
+    "base_path": "tests/example_data/subsets_input/mock_base_path",
+    "run_id": "example_pipeline_2023",
+    "component_id": "component_1",
+    "cache_key": "42"
+  },
+  "index": {
+    "location": "/component_1"
+  },
+  "fields":
+  {
+    "data": {
+      "type": "binary",
+      "location": "/component_1"
+    },
+    "height": {
+      "type": "int32",
+      "location": "/component_1"
+    },
+    "width": {
+      "type": "int32",
+      "location": "/component_1"
+    },
+    "captions": {
+      "type": "string",
+      "location": "/component_1"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json
similarity index 100%
rename from tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json
rename to tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json
diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json
similarity index 100%
rename from tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json
rename to tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json
diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json
similarity index 100%
rename from tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json
rename to tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json
diff --git a/tests/test_component.py b/tests/test_component.py
index e759bd367..e5dcb3bc3 100644
--- a/tests/test_component.py
+++ b/tests/test_component.py
@@ -377,38 +377,22 @@ def test_wrap_transform():
             "description": "Component for testing",
             "image": "component:test",
             "consumes": {
-                "image": {
-                    "fields": {
-                        "height": {
-                            "type": "int16",
-                        },
-                        "width": {
-                            "type": "int16",
-                        },
-                    },
+                "image_height": {
+                    "type": "int16",
                 },
-                "caption": {
-                    "fields": {
-                        "text": {
-                            "type": "string",
-                        },
-                    },
+                "image_width": {
+                    "type": "int16",
+                },
+                "caption_text": {
+                    "type": "string",
                 },
             },
             "produces": {
-                "caption": {
-                    "fields": {
-                        "text": {
-                            "type": "string",
-                        },
-                    },
+                "caption_text": {
+                    "type": "string",
                 },
-                "image": {
-                    "fields": {
-                        "height": {
-                            "type": "int16",
-                        },
-                    },
+                "image_height": {
+                    "type": "int16",
                 },
             },
         },
@@ -425,9 +409,9 @@ def test_wrap_transform():
     def transform(dataframe: pd.DataFrame) -> pd.DataFrame:
         # Check hierarchical columns
         assert dataframe.columns.tolist() == [
-            ("image", "height"),
-            ("image", "width"),
-            ("caption", "text"),
+            "image_height",
+            "image_width",
+            "caption_text",
         ]
         return dataframe
 
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
deleted file mode 100644
index 3af3ea425..000000000
--- a/tests/test_manifest.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import json
-import pkgutil
-from pathlib import Path
-
-import pytest
-from fondant.core.exceptions import InvalidManifest
-from fondant.core.manifest import Field, Index, Manifest, Subset, Type
-
-manifest_path = Path(__file__).parent / "example_specs/manifests"
-
-
-@pytest.fixture()
-def valid_manifest():
-    with open(manifest_path / "valid_manifest.json") as f:
-        return json.load(f)
-
-
-@pytest.fixture()
-def invalid_manifest():
-    with open(manifest_path / "invalid_manifest.json") as f:
-        return json.load(f)
-
-
-def test_manifest_validation(valid_manifest, invalid_manifest):
-    """Test that the manifest is validated correctly on instantiation."""
-    Manifest(valid_manifest)
-    with pytest.raises(InvalidManifest):
-        Manifest(invalid_manifest)
-
-
-def test_subset_init():
-    """Test initializing a subset."""
-    subset_spec = {
-        "location": "/images/ABC/123",
-        "fields": {
-            "data": {
-                "type": "binary",
-            },
-        },
-    }
-    subset = Subset(specification=subset_spec, base_path="/tmp")
-    assert subset.location == "/tmp/images/ABC/123"
-    assert (
-        subset.__repr__()
-        == "Subset({'location': '/images/ABC/123', 'fields': {'data': {'type': 'binary'}}})"
-    )
-
-
-def test_subset_fields():
-    """Test manipulating subset fields."""
-    subset_spec = {
-        "location": "/images/ABC/123",
-        "fields": {
-            "data": {
-                "type": "binary",
-            },
-        },
-    }
-    subset = Subset(specification=subset_spec, base_path="/tmp")
-
-    # add a field
-    subset.add_field(name="data2", type_=Type("binary"))
-    assert "data2" in subset.fields
-
-    # add a duplicate field
-    with pytest.raises(ValueError, match="A field with name data2 already exists"):
-        subset.add_field(name="data2", type_=Type("binary"))
-
-    # add a duplicate field but overwrite
-    subset.add_field(name="data2", type_=Type("string"), overwrite=True)
-    assert subset.fields["data2"].type == Type("string")
-
-    # remove a field
-    subset.remove_field(name="data2")
-    assert "data2" not in subset.fields
-
-
-def test_set_base_path(valid_manifest):
-    """Test altering the base path in the manifest."""
-    manifest = Manifest(valid_manifest)
-    tmp_path = "/tmp/base_path"
-    manifest.update_metadata(key="base_path", value=tmp_path)
-
-    assert manifest.base_path == tmp_path
-    assert manifest._specification["metadata"]["base_path"] == tmp_path
-
-
-def test_from_to_file(valid_manifest):
-    """Test reading from and writing to file."""
-    tmp_path = "/tmp/manifest.json"
-    with open(tmp_path, "w", encoding="utf-8") as f:
-        json.dump(valid_manifest, f)
-
-    manifest = Manifest.from_file(tmp_path)
-    assert manifest.metadata == valid_manifest["metadata"]
-
-    manifest.to_file(tmp_path)
-    with open(tmp_path, encoding="utf-8") as f:
-        assert json.load(f) == valid_manifest
-
-
-def test_attribute_access(valid_manifest):
-    """
-    Test that attributes can be accessed as expected:
-    - Fixed properties should be accessible as an attribute
-    - Dynamic properties should be accessible by lookup.
-    """
-    manifest = Manifest(valid_manifest)
-
-    assert manifest.metadata == valid_manifest["metadata"]
-    assert manifest.index.location == "gs://bucket/index"
-    assert manifest.subsets["images"].location == "gs://bucket/images"
-    assert manifest.subsets["images"].fields["data"].type == Type("binary")
-
-
-def test_manifest_creation():
-    """Test the stepwise creation of a manifest via the Manifest class."""
-    base_path = "gs://bucket"
-    run_id = "run_id"
-    pipeline_name = "pipeline_name"
-    component_id = "component_id"
-    cache_key = "42"
-
-    manifest = Manifest.create(
-        pipeline_name=pipeline_name,
-        base_path=base_path,
-        run_id=run_id,
-        component_id=component_id,
-        cache_key=cache_key,
-    )
-
-    manifest.add_subset("images", [("width", Type("int32")), ("height", Type("int32"))])
-    manifest.subsets["images"].add_field("data", Type("binary"))
-
-    assert manifest._specification == {
-        "metadata": {
-            "pipeline_name": pipeline_name,
-            "base_path": base_path,
-            "run_id": run_id,
-            "component_id": component_id,
-            "cache_key": cache_key,
-        },
-        "index": {"location": f"/{pipeline_name}/{run_id}/{component_id}/index"},
-        "subsets": {
-            "images": {
-                "location": f"/{pipeline_name}/{run_id}/{component_id}/images",
-                "fields": {
-                    "width": {
-                        "type": "int32",
-                    },
-                    "height": {
-                        "type": "int32",
-                    },
-                    "data": {
-                        "type": "binary",
-                    },
-                },
-            },
-        },
-    }
-
-
-def test_manifest_repr():
-    manifest = Manifest.create(
-        pipeline_name="NAME",
-        base_path="/",
-        run_id="A",
-        component_id="1",
-        cache_key="42",
-    )
-    assert (
-        manifest.__repr__()
-        == "Manifest({'metadata': {'base_path': '/', 'pipeline_name': 'NAME', 'run_id': 'A',"
-        " 'component_id': '1', 'cache_key': '42'},"
-        " 'index': {'location': '/NAME/A/1/index'}, 'subsets': {}})"
-    )
-
-
-def test_manifest_alteration(valid_manifest):
-    """Test alteration functionalities of a manifest via the Manifest class."""
-    manifest = Manifest(valid_manifest)
-
-    # test adding a subset
-    manifest.add_subset(
-        "images2",
-        [("width", Type("int32")), ("height", Type("int32"))],
-    )
-    assert "images2" in manifest.subsets
-
-    # test adding a duplicate subset
-    with pytest.raises(ValueError, match="A subset with name images2 already exists"):
-        manifest.add_subset(
-            "images2",
-            [("width", Type("int32")), ("height", Type("int32"))],
-        )
-
-    # test removing a subset
-    manifest.remove_subset("images2")
-    assert "images2" not in manifest.subsets
-
-    # test removing a nonexistant subset
-    with pytest.raises(ValueError, match="Subset pictures not found in specification"):
-        manifest.remove_subset("pictures")
-
-
-def test_manifest_copy_and_adapt(valid_manifest):
-    """Test that a manifest can be copied and adapted without changing the original."""
-    manifest = Manifest(valid_manifest)
-    new_manifest = manifest.copy()
-    new_manifest.remove_subset("images")
-    assert manifest._specification == valid_manifest
-    assert new_manifest._specification != valid_manifest
-
-
-def test_no_validate_schema(monkeypatch, valid_manifest):
-    monkeypatch.setattr(pkgutil, "get_data", lambda package, resource: None)
-    with pytest.raises(FileNotFoundError):
-        Manifest(valid_manifest)
-
-
-def test_index_fields():
-    """Test that the fields property of Index returns the expected fields."""
-    subset_spec = {
-        "location": "/images/ABC/123",
-        "fields": {
-            "data": {
-                "type": "binary",
-            },
-        },
-    }
-
-    index = Index(specification=subset_spec, base_path="/tmp")
-
-    expected_fields = {
-        "id": Field(name="id", type=Type("string")),
-        "source": Field(name="source", type=Type("string")),
-    }
-
-    assert index.fields == expected_fields

From bb3b623a5a587ef4523c8bf41292e84726e8e902 Mon Sep 17 00:00:00 2001
From: Matthias Richter <matthias.r1092@gmail.com>
Date: Thu, 23 Nov 2023 14:47:54 +0100
Subject: [PATCH 2/4] Refactor component package (#654)

Refactor component package as part of #643

---------

Co-authored-by: Robbe Sneyders <robbe.sneyders@gmail.com>
Co-authored-by: Philippe Moussalli <philippe.moussalli95@gmail.com>
---
 src/fondant/component/data_io.py              | 175 ++++++------------
 src/fondant/component/executor.py             |  34 +---
 src/fondant/core/manifest.py                  |  48 +----
 .../component_specs/arguments/component.yaml  |  68 +++++++
 .../arguments/component_default_args.yaml     |  69 +++++++
 .../arguments/input_manifest.json             |  18 ++
 .../examples/component_specs/component.yaml   |  23 +++
 .../component_specs/input_manifest.json       |  17 ++
 .../component/examples/data/components/1.yaml |  29 +++
 tests/component/examples/data/manifest.json   |  29 +++
 .../component_1/part.0.parquet                | Bin 0 -> 3542 bytes
 .../component_1/part.1.parquet                | Bin 0 -> 3526 bytes
 .../component_1/part.2.parquet                | Bin 0 -> 3584 bytes
 .../component_2/part.0.parquet                | Bin 0 -> 3018 bytes
 .../component_2/part.1.parquet                | Bin 0 -> 3085 bytes
 .../component_2/part.2.parquet                | Bin 0 -> 3066 bytes
 .../example_pipeline/cache/42.txt             |   1 +
 .../component_1/manifest.json                 |  31 ++++
 tests/{ => component}/test_component.py       |   4 +-
 tests/{ => component}/test_data_io.py         | 128 +++++--------
 tests/core/test_manifest.py                   |  19 --
 tests/examples/example_data/raw/split.py      |  10 +-
 22 files changed, 421 insertions(+), 282 deletions(-)
 create mode 100644 tests/component/examples/component_specs/arguments/component.yaml
 create mode 100644 tests/component/examples/component_specs/arguments/component_default_args.yaml
 create mode 100644 tests/component/examples/component_specs/arguments/input_manifest.json
 create mode 100644 tests/component/examples/component_specs/component.yaml
 create mode 100644 tests/component/examples/component_specs/input_manifest.json
 create mode 100644 tests/component/examples/data/components/1.yaml
 create mode 100644 tests/component/examples/data/manifest.json
 create mode 100644 tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.0.parquet
 create mode 100644 tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.1.parquet
 create mode 100644 tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.2.parquet
 create mode 100644 tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.0.parquet
 create mode 100644 tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.1.parquet
 create mode 100644 tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.2.parquet
 create mode 100644 tests/component/examples/mock_base_path/example_pipeline/cache/42.txt
 create mode 100644 tests/component/examples/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
 rename tests/{ => component}/test_component.py (99%)
 rename tests/{ => component}/test_data_io.py (61%)

diff --git a/src/fondant/component/data_io.py b/src/fondant/component/data_io.py
index 7023c1ee2..79a181f8d 100644
--- a/src/fondant/component/data_io.py
+++ b/src/fondant/component/data_io.py
@@ -1,16 +1,19 @@
 import logging
 import os
 import typing as t
+from collections import defaultdict
 
 import dask.dataframe as dd
 from dask.diagnostics import ProgressBar
 from dask.distributed import Client
 
-from fondant.core.component_spec import ComponentSpec, ComponentSubset
+from fondant.core.component_spec import ComponentSpec
 from fondant.core.manifest import Manifest
 
 logger = logging.getLogger(__name__)
 
+DEFAULT_INDEX_NAME = "id"
+
 
 class DataIO:
     def __init__(self, *, manifest: Manifest, component_spec: ComponentSpec) -> None:
@@ -82,73 +85,48 @@ def partition_loaded_dataframe(self, dataframe: dd.DataFrame) -> dd.DataFrame:
 
         return dataframe
 
-    def _load_subset(self, subset_name: str, fields: t.List[str]) -> dd.DataFrame:
+    def load_dataframe(self) -> dd.DataFrame:
         """
-        Function that loads a subset from the manifest as a Dask dataframe.
-
-        Args:
-            subset_name: the name of the subset to load
-            fields: the fields to load from the subset
+        Function that loads the subsets defined in the component spec as a single Dask dataframe for
+          the user.
 
         Returns:
-            The subset as a dask dataframe
+            The Dask dataframe with all columns defined in the manifest field mapping
         """
-        subset = self.manifest.subsets[subset_name]
-        remote_path = subset.location
-
-        logger.info(f"Loading subset {subset_name} with fields {fields}...")
+        dataframe = None
+        field_mapping = defaultdict(list)
 
-        subset_df = dd.read_parquet(
-            remote_path,
-            columns=fields,
-            calculate_divisions=True,
+        # Add index field to field mapping to guarantee start reading with the index dataframe
+        field_mapping[self.manifest.get_field_location(DEFAULT_INDEX_NAME)].append(
+            DEFAULT_INDEX_NAME,
         )
 
-        # add subset prefix to columns
-        subset_df = subset_df.rename(
-            columns={col: subset_name + "_" + col for col in subset_df.columns},
-        )
+        for field_name in self.component_spec.consumes:
+            location = self.manifest.get_field_location(field_name)
+            field_mapping[location].append(field_name)
 
-        return subset_df
-
-    def _load_index(self) -> dd.DataFrame:
-        """
-        Function that loads the index from the manifest as a Dask dataframe.
-
-        Returns:
-            The index as a dask dataframe
-        """
-        # get index subset from the manifest
-        index = self.manifest.index
-        # get remote path
-        remote_path = index.location
-
-        # load index from parquet, expecting id and source columns
-        return dd.read_parquet(remote_path, calculate_divisions=True)
-
-    def load_dataframe(self) -> dd.DataFrame:
-        """
-        Function that loads the subsets defined in the component spec as a single Dask dataframe for
-          the user.
+        for location, fields in field_mapping.items():
+            if DEFAULT_INDEX_NAME in fields:
+                fields.remove(DEFAULT_INDEX_NAME)
 
-        Returns:
-            The Dask dataframe with the field columns in the format (<subset>_<column_name>)
-                as well as the index columns.
-        """
-        # load index into dataframe
-        dataframe = self._load_index()
-        for name, subset in self.component_spec.consumes.items():
-            fields = list(subset.fields.keys())
-            subset_df = self._load_subset(name, fields)
-            # left joins -> filter on index
-            dataframe = dd.merge(
-                dataframe,
-                subset_df,
-                left_index=True,
-                right_index=True,
-                how="left",
+            partial_df = dd.read_parquet(
+                location,
+                columns=fields,
+                index=DEFAULT_INDEX_NAME,
+                calculate_divisions=True,
             )
 
+            if dataframe is None:
+                # ensure that the index is set correctly and divisions are known.
+                dataframe = partial_df
+            else:
+                dataframe = dataframe.merge(
+                    partial_df,
+                    how="left",
+                    left_index=True,
+                    right_index=True,
+                )
+
         dataframe = self.partition_loaded_dataframe(dataframe)
 
         logging.info(f"Columns of dataframe: {list(dataframe.columns)}")
@@ -170,79 +148,48 @@ def write_dataframe(
         dataframe: dd.DataFrame,
         dask_client: t.Optional[Client] = None,
     ) -> None:
-        write_tasks = []
+        columns_to_produce = [
+            column_name for column_name, field in self.component_spec.produces.items()
+        ]
 
-        dataframe.index = dataframe.index.rename("id")
+        dataframe.index = dataframe.index.rename(DEFAULT_INDEX_NAME)
 
-        # Turn index into an empty dataframe so we can write it
-        index_df = dataframe.index.to_frame().drop(columns=["id"])
-        write_index_task = self._write_subset(
-            index_df,
-            subset_name="index",
-            subset_spec=self.component_spec.index,
-        )
-        write_tasks.append(write_index_task)
+        # validation that all columns are in the dataframe
+        self.validate_dataframe_columns(dataframe, columns_to_produce)
 
-        for subset_name, subset_spec in self.component_spec.produces.items():
-            subset_df = self._extract_subset_dataframe(
-                dataframe,
-                subset_name=subset_name,
-                subset_spec=subset_spec,
-            )
-            write_subset_task = self._write_subset(
-                subset_df,
-                subset_name=subset_name,
-                subset_spec=subset_spec,
-            )
-            write_tasks.append(write_subset_task)
+        dataframe = dataframe[columns_to_produce]
+        write_task = self._write_dataframe(dataframe)
 
         with ProgressBar():
             logging.info("Writing data...")
-            # alternative implementation possible: futures = client.compute(...)
-            dd.compute(*write_tasks, scheduler=dask_client)
+            dd.compute(write_task, scheduler=dask_client)
 
     @staticmethod
-    def _extract_subset_dataframe(
-        dataframe: dd.DataFrame,
-        *,
-        subset_name: str,
-        subset_spec: ComponentSubset,
-    ) -> dd.DataFrame:
-        """Create subset dataframe to save with the original field name as the column name."""
-        # Create a new dataframe with only the columns needed for the output subset
-        subset_columns = [f"{subset_name}_{field}" for field in subset_spec.fields]
-        try:
-            subset_df = dataframe[subset_columns]
-        except KeyError as e:
+    def validate_dataframe_columns(dataframe: dd.DataFrame, columns: t.List[str]):
+        """Validates that all columns are available in the dataset."""
+        missing_fields = []
+        for col in columns:
+            if col not in dataframe.columns:
+                missing_fields.append(col)
+
+        if missing_fields:
             msg = (
-                f"Field {e.args[0]} defined in output subset {subset_name} "
+                f"Fields {missing_fields} defined in output dataset "
                 f"but not found in dataframe"
             )
             raise ValueError(
                 msg,
             )
 
-        # Remove the subset prefix from the column names
-        subset_df = subset_df.rename(
-            columns={col: col[(len(f"{subset_name}_")) :] for col in subset_columns},
+    def _write_dataframe(self, dataframe: dd.DataFrame) -> dd.core.Scalar:
+        """Create dataframe writing task."""
+        location = (
+            self.manifest.base_path + "/" + self.component_spec.component_folder_name
         )
-
-        return subset_df
-
-    def _write_subset(
-        self,
-        dataframe: dd.DataFrame,
-        *,
-        subset_name: str,
-        subset_spec: ComponentSubset,
-    ) -> dd.core.Scalar:
-        if subset_name == "index":
-            location = self.manifest.index.location
-        else:
-            location = self.manifest.subsets[subset_name].location
-
-        schema = {field.name: field.type.value for field in subset_spec.fields.values()}
-
+        schema = {
+            field.name: field.type.value
+            for field in self.component_spec.produces.values()
+        }
         return self._create_write_task(dataframe, location=location, schema=schema)
 
     @staticmethod
diff --git a/src/fondant/component/executor.py b/src/fondant/component/executor.py
index 3d4d6097f..d77200da8 100644
--- a/src/fondant/component/executor.py
+++ b/src/fondant/component/executor.py
@@ -491,14 +491,11 @@ def optional_fondant_arguments() -> t.List[str]:
     @staticmethod
     def wrap_transform(transform: t.Callable, *, spec: ComponentSpec) -> t.Callable:
         """Factory that creates a function to wrap the component transform function. The wrapper:
-        - Converts the columns to hierarchical format before passing the dataframe to the
-          transform function
         - Removes extra columns from the returned dataframe which are not defined in the component
           spec `produces` section
         - Sorts the columns from the returned dataframe according to the order in the component
           spec `produces` section to match the order in the `meta` argument passed to Dask's
           `map_partitions`.
-        - Flattens the returned dataframe columns.
 
         Args:
             transform: Transform method to wrap
@@ -506,27 +503,13 @@ def wrap_transform(transform: t.Callable, *, spec: ComponentSpec) -> t.Callable:
         """
 
         def wrapped_transform(dataframe: pd.DataFrame) -> pd.DataFrame:
-            # Switch to hierarchical columns
-            dataframe.columns = pd.MultiIndex.from_tuples(
-                tuple(column.split("_")) for column in dataframe.columns
-            )
-
             # Call transform method
             dataframe = transform(dataframe)
 
             # Drop columns not in specification
-            columns = [
-                (subset_name, field)
-                for subset_name, subset in spec.produces.items()
-                for field in subset.fields
-            ]
-            dataframe = dataframe[columns]
-
-            # Switch to flattened columns
-            dataframe.columns = [
-                "_".join(column) for column in dataframe.columns.to_flat_index()
-            ]
-            return dataframe
+            columns = [name for name, field in spec.produces.items()]
+
+            return dataframe[columns]
 
         return wrapped_transform
 
@@ -552,11 +535,8 @@ def _execute_component(
 
         # Create meta dataframe with expected format
         meta_dict = {"id": pd.Series(dtype="object")}
-        for subset_name, subset in self.spec.produces.items():
-            for field_name, field in subset.fields.items():
-                meta_dict[f"{subset_name}_{field_name}"] = pd.Series(
-                    dtype=pd.ArrowDtype(field.type.value),
-                )
+        for field_name, field in self.spec.produces.items():
+            meta_dict[field_name] = pd.Series(dtype=pd.ArrowDtype(field.type.value))
         meta_df = pd.DataFrame(meta_dict).set_index("id")
 
         wrapped_transform = self.wrap_transform(component.transform, spec=self.spec)
@@ -573,8 +553,10 @@ def _execute_component(
 
         return dataframe
 
+    # TODO: fix in #244
     def _infer_index_change(self) -> bool:
         """Infer if this component changes the index based on its component spec."""
+        """
         if not self.spec.accepts_additional_subsets:
             return True
         if not self.spec.outputs_additional_subsets:
@@ -585,6 +567,8 @@ def _infer_index_change(self) -> bool:
         return any(
             not subset.additional_fields for subset in self.spec.produces.values()
         )
+        """
+        return False
 
 
 class DaskWriteExecutor(Executor[DaskWriteComponent]):
diff --git a/src/fondant/core/manifest.py b/src/fondant/core/manifest.py
index fc750620d..013ce2b71 100644
--- a/src/fondant/core/manifest.py
+++ b/src/fondant/core/manifest.py
@@ -4,7 +4,6 @@
 import pkgutil
 import types
 import typing as t
-from collections import OrderedDict
 from dataclasses import asdict, dataclass
 from pathlib import Path
 
@@ -146,7 +145,7 @@ def metadata(self) -> t.Dict[str, t.Any]:
 
     @property
     def index(self) -> Field:
-        return Field(name="Index", location=self._specification["index"]["location"])
+        return Field(name="id", location=self._specification["index"]["location"])
 
     def update_metadata(self, key: str, value: t.Any) -> None:
         self.metadata[key] = value
@@ -155,43 +154,16 @@ def update_metadata(self, key: str, value: t.Any) -> None:
     def base_path(self) -> str:
         return self.metadata["base_path"]
 
-    @property
-    def field_mapping(self) -> t.Mapping[str, t.List[str]]:
-        """
-        Retrieve a mapping of field locations to corresponding field names.
-        A dictionary where keys are field locations and values are lists
-        of column names.
-
-        The method returns an immutable OrderedDict where the first dict element contains the
-        location of the dataframe with the index. This allows an efficient left join operation.
-
-        Example:
-           {
-               "/base_path/component_1": ["Name", "HP"],
-               "/base_path/component_2": ["Type 1", "Type 2"],
-           }
-        """
-        field_mapping = {}
-        for field_name, field in {"id": self.index, **self.fields}.items():
-            location = (
-                f"{self.base_path}/{self.pipeline_name}/{self.run_id}{field.location}"
-            )
-            if location in field_mapping:
-                field_mapping[location].append(field_name)
-            else:
-                field_mapping[location] = [field_name]
-
-        # Sort field mapping that the first dataset contains the index
-        sorted_keys = sorted(
-            field_mapping.keys(),
-            key=lambda key: "id" in field_mapping[key],
-            reverse=True,
-        )
-        sorted_field_mapping = OrderedDict(
-            (key, field_mapping[key]) for key in sorted_keys
-        )
+    def get_field_location(self, field_name: str):
+        """Return absolute path to the field location."""
+        if field_name == "id":
+            return f"{self.base_path}/{self.pipeline_name}/{self.run_id}{self.index.location}"
+        if field_name not in self.fields:
+            msg = f"Field {field_name} is not available in the manifest."
+            raise ValueError(msg)
 
-        return types.MappingProxyType(sorted_field_mapping)
+        field = self.fields[field_name]
+        return f"{self.base_path}/{self.pipeline_name}/{self.run_id}{field.location}"
 
     @property
     def run_id(self) -> str:
diff --git a/tests/component/examples/component_specs/arguments/component.yaml b/tests/component/examples/component_specs/arguments/component.yaml
new file mode 100644
index 000000000..659ed0026
--- /dev/null
+++ b/tests/component/examples/component_specs/arguments/component.yaml
@@ -0,0 +1,68 @@
+name: Example component
+description: This is an example component
+image: example_component:latest
+
+args:
+  string_default_arg:
+    description: default string argument
+    type: str
+    default: foo
+  integer_default_arg:
+    description: default integer argument
+    type: int
+    default: 0
+  float_default_arg:
+    description: default float argument
+    type: float
+    default: 3.14
+  bool_false_default_arg:
+    description: default bool argument
+    type: bool
+    default: False
+  bool_true_default_arg:
+    description: default bool argument
+    type: bool
+    default: True
+  list_default_arg:
+    description: default list argument
+    type: list
+    default: ["foo", "bar"]
+  dict_default_arg:
+    description: default dict argument
+    type: dict
+    default: {"foo":1, "bar":2}
+  string_default_arg_none:
+    description: default string argument
+    type: str
+    default: None
+  integer_default_arg_none:
+    description: default integer argument
+    type: int
+    default: 0
+  float_default_arg_none:
+    description: default float argument
+    type: float
+    default: 0.0
+  bool_default_arg_none:
+    description: default bool argument
+    type: bool
+    default: False
+  list_default_arg_none:
+    description: default list argument
+    type: list
+    default: []
+  dict_default_arg_none:
+    description: default dict argument
+    type: dict
+    default: {}
+  override_default_arg:
+    description: argument with default python value type that can be overriden
+    type: str
+    default: foo
+  override_default_arg_with_none:
+    description: argument with default python type that can be overriden with None
+    type: str
+  optional_arg:
+    description: optional argument
+    type: str
+    default: None
diff --git a/tests/component/examples/component_specs/arguments/component_default_args.yaml b/tests/component/examples/component_specs/arguments/component_default_args.yaml
new file mode 100644
index 000000000..816211c04
--- /dev/null
+++ b/tests/component/examples/component_specs/arguments/component_default_args.yaml
@@ -0,0 +1,69 @@
+name: Example component
+description: This is an example component
+image: example_component:latest
+
+args:
+  string_default_arg:
+    description: default string argument
+    type: str
+    default: foo
+  integer_default_arg:
+    description: default integer argument
+    type: int
+    default: 1
+  float_default_arg:
+    description: default float argument
+    type: float
+    default: 3.14
+  bool_false_default_arg:
+    description: default bool argument
+    type: bool
+    default: False
+  bool_true_default_arg:
+    description: default bool argument
+    type: bool
+    default: True
+  list_default_arg:
+    description: default list argument
+    type: list
+    default: ["foo", "bar"]
+  dict_default_arg:
+    description: default dict argument
+    type: dict
+    default: {"foo":1, "bar":2}
+  string_default_arg_none:
+    description: default string argument
+    type: str
+    default: None
+  integer_default_arg_none:
+    description: default integer argument
+    type: int
+    default: None
+  float_default_arg_none:
+    description: default float argument
+    type: float
+    default: None
+  bool_default_arg_none:
+    description: default bool argument
+    type: bool
+    default: None
+  list_default_arg_none:
+    description: default list argument
+    type: list
+    default: None
+  dict_default_arg_none:
+    description: default dict argument
+    type: dict
+    default: None
+  override_default_arg:
+    description: argument with default python value type that can be overriden
+    type: str
+    default: foo
+  override_default_none_arg:
+    description: argument with default None value type that can be overriden with a valid python type
+    type: float
+    default: None
+  override_default_arg_with_none:
+    description: argument with default python type that can be overriden with None
+    type: str
+
diff --git a/tests/component/examples/component_specs/arguments/input_manifest.json b/tests/component/examples/component_specs/arguments/input_manifest.json
new file mode 100644
index 000000000..9ee2494f9
--- /dev/null
+++ b/tests/component/examples/component_specs/arguments/input_manifest.json
@@ -0,0 +1,18 @@
+{
+  "metadata": {
+    "pipeline_name": "example_pipeline",
+    "base_path": "tests/example_data/subsets_input/mock_base_path",
+    "run_id": "example_pipeline_123",
+    "component_id": "component_1",
+    "cache_key": "00"
+  },
+  "index": {
+    "location": "/component_1"
+  },
+  "fields": {
+    "data": {
+      "type": "binary",
+      "location": "/component_1"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/component/examples/component_specs/component.yaml b/tests/component/examples/component_specs/component.yaml
new file mode 100644
index 000000000..973cc3e6b
--- /dev/null
+++ b/tests/component/examples/component_specs/component.yaml
@@ -0,0 +1,23 @@
+name: Example component
+description: This is an example component
+image: example_component:latest
+
+consumes:
+  images_data:
+    type: binary
+
+produces:
+  images_data:
+    type: array
+    items:
+      type: float32
+additionalFields: false
+
+
+args:
+  flag:
+    description: user argument
+    type: str
+  value:
+    description: integer value
+    type: int
diff --git a/tests/component/examples/component_specs/input_manifest.json b/tests/component/examples/component_specs/input_manifest.json
new file mode 100644
index 000000000..80fa0b91d
--- /dev/null
+++ b/tests/component/examples/component_specs/input_manifest.json
@@ -0,0 +1,17 @@
+{
+  "metadata": {
+    "pipeline_name": "test_pipeline",
+    "base_path": "/bucket",
+    "run_id": "test_pipeline_12345",
+    "component_id": "67890"
+  },
+  "index": {
+    "location": "/example_component"
+  },
+  "fields": {
+    "data": {
+      "location": "/example_component",
+      "type": "binary"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/component/examples/data/components/1.yaml b/tests/component/examples/data/components/1.yaml
new file mode 100644
index 000000000..95e5e578f
--- /dev/null
+++ b/tests/component/examples/data/components/1.yaml
@@ -0,0 +1,29 @@
+name: Test component 1
+description: This is an example component
+image: example_component:latest
+
+consumes:
+  Name:
+    type: "string"
+  HP:
+    type: "int32"
+
+  Type 1:
+    type: "string"
+  Type 2:
+    type: "string"
+
+produces:
+  Name:
+    type: "string"
+  HP:
+    type: "int32"
+  Type 1:
+    type: "string"
+  Type 2:
+    type: "string"
+
+args:
+  storage_args:
+    description: Storage arguments
+    type: str
\ No newline at end of file
diff --git a/tests/component/examples/data/manifest.json b/tests/component/examples/data/manifest.json
new file mode 100644
index 000000000..cc579fef1
--- /dev/null
+++ b/tests/component/examples/data/manifest.json
@@ -0,0 +1,29 @@
+{
+  "metadata": {
+    "pipeline_name": "test_pipeline",
+    "base_path": "tests/component/examples/data",
+    "run_id": "test_pipeline_12345",
+    "component_id": "67890"
+  },
+  "index": {
+    "location": "/component_1"
+  },
+  "fields": {
+    "Name": {
+      "type": "string",
+      "location": "/component_1"
+    },
+    "HP": {
+      "type": "int32",
+      "location": "/component_1"
+    },
+    "Type 1": {
+      "type": "string",
+      "location": "/component_2"
+    },
+    "Type 2": {
+      "type": "string",
+      "location": "/component_2"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.0.parquet b/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.0.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..fa5d96dad64c5e6291eb03909f8542a6af2faf7b
GIT binary patch
literal 3542
zcmcInUu+{s8J|sTXOpvYz9Vba;48-5IoCExZ706gM-IAn;(wPoj&rdcLltJdYkT7L
zu5Y~?$9IZ^Djq686_3CJLe&WhLI{Z`B&aHcctAYx!Ye|61OgrqPZdHH-^@DynnQv@
zE&pa`zVDmw?|id%S*pYZm+((s`1;g_APeX^LT|Y?rV&DETa)ns+vXH33Wp=^N{EM9
zRn=|I8=HY?ri0BM))h54>sK|^(7Bhnke@ulrotsdI}dGQTAIoQ{WWY_#x-9!t>J-X
zkb%lYLm6zTW}m2>DQ>D3?i&i%%BO7$s4Y`fJ)pm)s*2{GnY)j>wvM?}C=CRYXc`xu
zD-)%yjyU3;@fwz8aAlv5yE0e7mW3^MN`wh%!pszR`YJY!A-9II?wPO~)UJ22KH#SP
zk|`Tq?o(o!baAs|bMvsB&`CGE15ph+raBaSUhuiY{p4neD29pk-+k@#+;`lwJOPL1
zIj{ert*Uxr9-&un%p>EP*V`p}oAZV;n%cs|90^_|KRYv5wY5Gun%)(4h*eVu-0saH
zX}7ge-)^<ISyXe+dM!Hr)k4FTu|>nr8JcfqZc9-JIB>(C(+s1>ZH22uQ~R3TQ@Mb@
zj7>b?9=oSqYV+1+9%^Ig*cz9~=owZ=K!PH47NE3#!6SG=chGG&vmm-z{9q^Xvk4ew
z&U5wUuy+lI@^36`{C>53bMt}vO8r~C&;IfFbGKht$TvFQ{l4~>w_+fRQ1ownNbm-e
z$m0)+t`hF4!bCt0{Cp<xCqDYqsVINhP;dbb8WJWD41RbZ1najag?G6P;U#wPe#^NQ
z(KLrvr>9(~?BmhAk9XarloF*Z0OgZ_EkFWr8z2Eb4cGxZ19%be3BY|o7SI4J0qy~|
z0sDX`py1<I2P_hVzC3<{Usa)TmMW1y;fVY*W0?TD{S+!NpMTRyqWfz2HR+2f{>eAr
zu6*xD-wt?VPu}^%lVAVhOF#V0bFca_5B&w%`>QV`xW?I=$d_5}e&7!LeKPu0Z}cD2
zq5s14Q`7ntkMPFiNk6|f>EdV-CukBUA$S+S4e$V_06btCFax*(@B(~*S-@3*A8-wD
z9q=K*96$gB03Qa3fFR%_fR6$~fExf>kdFbvfR6(r09u@zfLj1s2q-Zt<g@U64)8pH
z7J3mt3-<yP$~kff#m7Z>@0k|)6OPDpV;Neg%T!>#uhag^xpVa*3ai)aeBg;Y`seG>
zH?KmyF5CJy9z3mH!HpymJ%eUP?P0-#1n;d0r>SnCjS%=OxPSoXWIQ1X3dq4waP|Mv
zg&*u;jDDc03w%Kg$WcD<?zK6Df(xRj4;MEch(C5cju3b;9ql&_xGMEQ<Zk#g5u!w1
zSqMiQ(mvy>jPp36!wm*9x~*v-(ITp*>|dlgn)j_yp9)57oiq(ajexXn_xht#h9MuQ
zO^ecdaMdf=!XW-QqUs<HuLinz_d#~?rO5H|nKKu?h|v3#YPjIXDfK_~WMG-p;6+bp
zgvSfu&^WqCzI@q|Lwk=MqFZ-XsO-gfPs!)Py<`M3e<>V?_CFe7DKu4V8751!Mr0G4
zBbw#@2%GTYr-lzzb3hE8QsXO2v88wfQV<!NKF3Tf#g~>N$4K-_mC9D_uG5j?JNygu
z6PMN`N!kX`&wWW+k<x7`D@koB(`K}E`T~DQiR-8Q{C37!!^pHSenFCk66}K~Rg&JM
z4_Hgm+7RT^FBW>5-N;o}>WR{!oUdB4zDc%7S_XQzpv4A-Oz}WY9L42YvW08OZlm7O
znxn#<Le_%4$&OsxrSi5?I~r_cB+}?<nw+b&<X+Zl?0{S@8JBZIs%N+d`}gXlmXg<o
zjF&0K<(>xmD_VgJ+n_Hl*VD=RawXZ!QN0q86UB#(S}Er6<C>Dof{se3aA3eMw4x^W
zN{5Xc#6W$@6sy(sJ!>mdOtq>Pe1AmcYs&+2>Q|<?wO5Oe?Kb+2+EKTjIEhCh4;=sB
zx8Kd>$_VnMKh1AhuMXrq_?N%nPhmGxp!RyWRvk35DVzBReiyo(ZKg+0cjR8B(~PHL
zYCWydm}vc!wY_nEF6R$?h?Neu39XG>tH$<fm5!2IKdo77XGrsSW{cLH)#7qaaLv)v
z%w^%sWFMWc(*-+v)oSLo^L6ga`gWq^#Ah|2ey~5bcX~dKQg!g7&?}7^wQNjYE>QpA
zd>!@o^kS!}ciCA0oiHyRDkGMsM5A7Oglj3A<>6pGws9cYn>)o6jrGjOhSt<eeL1nh
zbZnB9qm6@2(sDX2M_b`UmUabQx9G6O1fahp$tXjA(stS|Fd{T1U9gc2w6$_t7dGwE
zR%WzaI<9Sw*J-K4s7*Su{$opt_N=tS6t<~x=+mY{WPHaGI04$)^ghVbeumHXnUeJ^
zb25E$AFz!~1T3*L?oikkChDy3-9uuyk4^ZGVJ$ZM{Y7@6g~u0G*tyDrtMCiK`Jy8H
K@-M?bx_<%jI$mY~

literal 0
HcmV?d00001

diff --git a/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.1.parquet b/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0c86db04d9feade2d44ee9654dea31b673cc5e7d
GIT binary patch
literal 3526
zcmcInO>84c74FX1j-5EO*;U$V3xhmn#sqCL*iM|?3>iq<j{jmiwv+g8cF0zDm)#w=
zyL;O0IL;{2t^jGYTFs6)f;2)1?ZOI;IPL)<v_gUldqkYLfZ)KD1L6X&x}6`BAi=_x
z@>O@ediCC`@71ez%3L+dIGEqLnHSRqMr5d0DC#GU#c7J7QgTPZrs<sB7<5cS)?6Sf
zVr^(jG{cs$(U+0xoL%UT#DUPIXWbc1p=X`1nn>;9ksFlDhTOwQ#~ygk7}`*gO$pQf
z0A~nDl~qjNW6PQ%4^fA{?VR;WvY{|@O9nC}2dv<D1JTRuHWCrMIa92Hy66P;JbkYq
zX*zw2D$+Al*V&|JH&IWYfh7s7D1F_~1}1&;b{%6~R(TN7H-j}<Fb#}(>^utWVbw%}
zra3?7ne!S0tk4e@GFTG_qVpGv83P?*1shI^qW?}|=eOs}nmRI_-wRZcE{muy(H|`A
zD%w!jJl~<cZr;%7-v^7RqvD=yV%W%>+tgJ0z1wMIw6Q5jUlQqu-V~U>^VR}9fXG|3
z?p=(X@3AR7>}YC-{s)!0B{_^6w?1$Tx^yM5D?!vEHfV1ksc56V3GsY+9`#LSME}N{
zMFaI^W*(~@WY7=1JF0w4f1XVN!6z8gZ+o*NU3I>_xosexACW|E%PM5jU?|A1v;^He
z<6>NaHEMC@f+W^K2nX%hFQ=dvY1d1eL2n8LvOicV{9&VfySRrRHs0^O@#oVw7oUpq
zbLqnu%3pr2dqJ5Bzv~GwUVof&&HGtL3H2~D<r94$&iKCN3IBFF?7rTae+iBr#Y|Dq
z&BKGDVEk-~d7myYA?qkUp&c`nho-*hnRZZ%b8d<Q!~kW$Yk(HO4@d(J0IPtFIk!V5
ztG?iw-lUdEfYA2s2k}*7S{F31Zh1Vzy2&9T%||U>BZY}qe`J&6d@1;Zdx!nglSj`Y
zKMus+S^Rh*_B0_n*Glr!kA5}v%}<Dx@JDk2#xZ7PDqFTVdCTGZ?v3ytXT#5D0{?~U
zYpMFai+OS5tZJX#aKy-_R|r*vhB#miKxld$unx4`BN|^N8bA_tAMiE6*8!UVLT&<(
z1f&34077gAkOkxbdB8TH04M@VfE@ryNCi*@>;m=x`+ypt4rl<H0Ftl=fHwdS0p9?S
z#JveXbM6hEFb7SrA0%B9r<Qe}&9Ls+ScjzRsuU(a)mi@G!lkUqqLsDhZr@Lx;g4Sl
z|Mezh?W(VT6Tx*^^B3Y2>*@;<?jgoSG2S~<cFEkK3IT|jaex3$%lJhWDWB*M_&5G<
zHTKya%kXDPpYPW`pBQ%gK6!b8qWnv&OGheP%Iwb^k3+I5;^PBBQwBY?AG#NO5R%2v
z!=+%z79Cjg*qV79QsIsQ9d)26pwX7GA|6c2Y|97cNGFP+fhr4{h(n;Q4to0Nd_v<7
zv0xH;51a6@G7+dh4q+A4;o>E0_xIAvuZK=gFM^qjB1CRXVqqeVbLoE?Nguw_;BYb$
zvcuCQ2xz>!P_}&4lP&ufKeB4BtrFeI{ho_2ZTE^3VE)Q>Y}x<l#G=r^$kYsrW<`dt
zmN6nM>my{qN02x^BEL>Fl}MwjcO!SBA)p{Mc716wb~k!=C3H%$Uanf*so%4!lKq?e
zE2Kra6vuHD0BPGCx5_0tT$<xLT(V=y5{ZfVArWq!&u1%1dknp0#hTA?+>nEH@FYeO
zMe=|#$EAj#pLi|TQwFU}ZM6|A9r4+k$*V<qM^5k{@8*<9KbOoO^0DJ6Uyrv@J>G3K
zBt;nI)^ceR*2X2izEAWAV*R*ZNOE$krzm`;+U9#{v$YF)nRt}X42hj#6V^8yrM8$=
zhSpp%ALV-r*sm%%dDsDaQNEFgH&&`~Aw%qPG9Sx7YSl{-dp@d&nKam_O1VR=gGYIV
z@0E^P8Q24fDVeX;wwmTnGQZxgO~ige^y@2q`8=*<ey3TFj{UauR{gl!h@I_+llScS
zKXu%~N_7N$sn^BLtF=C#h4``)@#OZCIpVK}>a~6=y*{wwgSc~Dsbbkt6B6I6N<wr!
zf*T2i?1|)0Of|>+T;&g9h?EX1GRX~JuSJ^msw8H%&U4n@9TFZde386cxwy&+s@ryi
zOd8Hi`s8w+Cj6*1Q^+cp^Q`k~C04TcXSN`Jus+f}KOe{I4TvMxD~($9bcA2Yk@(<z
z9qUasF9~YbItySE-t$M|$l@v1YUEE)eSKi@aJUsI9CCx=ZhoEY^&-ZWBB&*ukF8oZ
zit_4l;jk#T?Mlm#QaIBkRYAsW(xof~s4tGw%21!AoGLj>2t`Q-R&RBrv@%K;7VVQ#
zwq*OHPZh`G1SeTif%I1WSwoK0EVpYJ>=5Trr-CiC=50?91X$~k`yfl|8CvDS$X414
zGI4evSPLx`@MN8FTVgG-RPFKo`xG10k@0AN&1FH?m#qseIKHrg_EqNJfL{srA60^X
Kv&!%v+&=+gWo-8V

literal 0
HcmV?d00001

diff --git a/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.2.parquet b/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_1/part.2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d226a424901fa9ff3679ae88c43487366d1b2722
GIT binary patch
literal 3584
zcmcInYiuJ|6`o1#tex1bTRhfSM96EJXsafv?IhdHDl0K|;#cg%aT42cx1@I-_S|?L
z-g(4!meBGpEiANMo<gWX0#zU(g!lmnRx4Ea2S^pa_<@l4!4D(^e-P>~&Yf{y%~pa!
zNB-{PoO91P_ndp~j4NC<$#|F-{mgGKl^B_!9;K*Xd2U^zD5|C_mTvHV7_KUkU>Uk7
zd4p_UYPSX3iUt>KV2=SeWAnNu(aUTZ8F~+O=qG3L*iv;(k)&~o3I+p$j5PWKu@V+K
z*s`@YJs(=rTP^IL2-GF%FgU?h<$<gl8vT(lj6_|dpI|ev$dm^31iOv2wxl5XR%{JM
z$n40-I~UIDhAAQXdU#7hqJikya86f5N%Ce`pmea-_QpaMG6pssTiJqviX|VoTQ=rm
zWvqICbA3w~Sh8ezfLI)8eOj4%B!qbT@Q_Z2%BZcPsgY0xYbN%NhqsY!=%#m#MQz+c
zMwbpu<_5?>qHfaHu9r~PK&BTItzgU2X@6i<I*`22g=?s*8<PKTfg(gRIN{4G=#faM
zq8o#@|4Mj6McTmfem?{Q)AGL*UIX*6-1d)qx+eY8$2bY1js$C<c>fXBbVEUXdTNq0
zEG*cXPJbiZL|x*+m2esK8<KZ|T``b_HGJhKF&MRV4Phf1^kGY)ZzRf6&+6%;BnVQE
zp%{^oXCUbzW{eq|yhdFebJelqr*=U$^~MOaGCg+r!?C~$n#_NDru66K%46ki>6ylN
z)mwi#diLsF5r0em-Vc<&-tW%*4y31l=$~W)(M4)36lFc@NR^n8h#dLTc;t2e^gl06
z``*c1bOw$i#f(tUgrK7+m_I+t{E;p(3Fmk|pgmV9KTR$BFL|iFppQxcng9+^3Hm&k
zaNi=_4+7;Wz$_pOSObIsp8zBP9|F7vcpPvO@O03(ybmVHBf1^&qsBB(!^HaD8gf`)
zcsPnsS06$J<`b{Gq4Qpjd6K_mJz=x5RG58r<k|1sd-I$9$}g%#@o~1``}&XnFaa_9
z+WRIM&oE{q`HB;<zk4EYj!wT3nEv(n<bPrMp<#S5#@ru0uJYGLJt7H|L;}@<&9?zE
z00Z^`9e@H*0UAIDbOE;k2Ec?+TSS9RG#mh>2j~L^fJ49?z()Z`fR6#513VA-IDn+!
zlYmbFJ`MN`;In`i0G|WA2zUwbdB7I{cL84nd<pO};1$5DfO~*1L)yMFOvP7+{o7AB
z>pSMKzT?!Av|Xfvf_HV|?=fexheaoQ_kEEE-svB`fBN^AA$u2Xy@Ll2%U-m!NU>w4
zAWJG@#waE*HR2Z56jho8pBWDj;M5EUSfnCybTYd9f2_p!_JB^mrvZ%IWh3&mFY>cT
zp=hHs>{u6RA~M;Zd+x-sCQAK1L04>5Gvha6Psgzse`Y2YcS(B=UvfBi;u_p#Afwre
z0un7ODdOIFnyY!w8gz+Z+}5xFug*9~Yqr`QoG^5LA6~13u1XdXk%d6~PF&JJ9Nu1J
z?ap>~_Imv2=+v3>Uc|}GN;I7F<AnO3dSY4zF?ik+vcsbpaA>%@c)oJcl1qD=9jsZ`
z7KrTm{hpA|Z1;i@i2Q}^xU~P#h!a9XLh#YzgjvD70Urk>%3XMo!<P#&d>|Pn)-^&+
zF3ct7l5q$@d}#U%Gc}i-n~xt+Y=Em)H|jUsx@7<5dy+g!ZiVBxO#pe;IBtPUx4A6G
zwYf~&p{3L3_&q{gJ>lm!GwvK(M~lNRa9odrb<h(f=`!hH&T%U}kWW8VP!+qGt1UEA
z>j!+kX7O4XZ{Rc!^iDxZn1xJnpHKCZe0{Nn>WiIbLso=A;aVYU!P-TcukR3fTdeoZ
zQij7#RZ;j{wZ*GhtGNYoxy2-(>k&P@U0A={SZ|4WrRVT6#U!sPpuegVaIX#el6)h*
z*qE;_3OS;e!+ffEyIEgPxcsCd=CYupDi`+kwlpXzyt;nS%)uUrPnlw^wz_L=WQt3z
z+Bx43iF|$D#3z1biW|H2<j`)j+pPCHjnwgeIK1up|E~QO=BopUm-aBfd97yhdGIfP
z&Y!|gra<hes9rOh*(KZY5Bx54<V{D9mX>+7DhtV_gw#kYWKSf2;>zwYJ{RK$J|x!n
zH!;Z#U#}&0>s49It)AqpwbdhWJhesg?&RWPOi<m`BjmDhX0nH8^K{ORR<ne>ayHLh
zUfWEqyZf`6kUv<T*gZKP{iO!@QBc<h&3ZP$&liY)aK8H8U9Bh!TE{sHpcBT$199NQ
zDb;Kg4^e%|cH*$Vnkemac6qC~MD}{>V^a~d^)8=UaCDUMLcg?M#x1wfa-<ZFWl2?#
zd5bhFjsVmb$LSTQPf|{s1&0VlNhVI~G^Dh0N(UD0kWzMNJEUDH59eu4cBlero%(a8
z9I07u%Td@S#-UCHm+0_aOW*`(Ym@sRPwE+-%~K_-S;xur@qOSdbVR_CbH-hYv&0c~
s=QnRsY^;ln+qPuQ3f=CkbD_nC7go%@%A(8go#6hc3j7nQz<<Ag1I5da;Q#;t

literal 0
HcmV?d00001

diff --git a/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.0.parquet b/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.0.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..80c4500be6f2dfaff4c3186d78f72926d6af77c7
GIT binary patch
literal 3018
zcmcImOKcle6dgN`>!c}7LdV#}fEiLZK$7|shotJFV>@>2I&R{|9^14^=VyDI@!0hz
zj#EWq*#)XtvSLx83bA0xk_BRi!~(Hk!Kz5GVB19(EI98?;yP(6fmB95^SSrF^X@(G
z-6wDG*#K)}zp}88EopX|Wu_Ts>-s3eFo~AfZrkt@*IKG&YD*A1YHI}Fk#5C~N2bxL
zi@Iezw$aeq4Z{J`s;<hNmL^-A%ZXN_Ybb;#E^4hl%dl%~dXAOYN!HHVJxk2&C?#}_
zfDTbJpIUL3EVlE^<P;YSg`%-oELZO3a=8anu^i(Mk*R&ZSv~B8JIvT8-7fo9zo~iy
ztko$wf3`Y*vHE_p_#7vx-E%09VXX{qlXx%;zVEVk)9g4c);~jvF>A1lUx@2{&8S$$
zZ=Wb&pa^Id+2x^17G~|{XxAt$6>Jw3qQ=?F&qCL~pSFyQLDb~5c7yp0R^OA+lO+Uc
z3C}Ffzb(EeHs2p3o~0K^Pc7p!JNsxP4N=LeJs7cBm@x}u9Xn%VM$S1H8!!skfir*u
z7z4(Ev%my!4tNbX4@?46z%=kWZ~<TeC-4T~0^Gnw;7z~-yal`syaRZFcL5G~510Wi
z0Y2a|Fblj7d;ok1%mMSj0&wM=<C-7pkSkz5$bu<s^|{h6$C1b7upGuXnb}jUa0Y)c
z_mlnnb?-z>^sGIaOaFdtV`hCzy;FExzy0UI-PwnV_C@vUZ}h*uGpF`_KH*_)hr7$F
zDRfyLI-FmQ`o10aJ+*uOLk6d<mpdJ1T=uqHRqG;aXV{5L)}u{)iAj5q^^hreWU?U&
z$uLgI?QvgwB^~g}_Tc}knDf^Or{r@upN?H%826mZ-V_Z*Y`cE6-RCqzQTMjxhTg3k
zZSJb~V@^}JJ9A#nG;LGAN<H^EL#(TaF}k{rh_a^Y%Jy-a$-LdciGo0`YiM#qQ8`2#
z-FmY>a5SVEjyPhks~u4hJ0imGbE<)GoEv0pZYw^&%pDv&Pv&?QoOx^!j$@e&+5c@e
zZ5+0k^msO;%7Zy1by%Uv8+^%p)c$Jc!$Q1Jn`t{Sk7jh17D?=JjR)o-iD^+((UvN9
z8ZD~2u1PJi)hD7i`(mrr=n=AAwbj-d2C)Vf7yJtW4r*|RQx7?V3xS0YcfhzN_-yv3
zaMkQf*C%6F$Q7u>c%I(`$aRC~7x_qqkMn$mk5;HH5;^AY5##E>pX8(FH|~^$`jb50
z<1r3TLJ}#G2fi_~jrhn%$-3SxCGv}fU}jfZ%XcJWUAw78B-nS7y1$)_rfO1fFCYow
zvM7XiN`<N}_mfM>cn4#{RY}N^_^u-CwbM~vE7f&fN@UAYJ>DtZLR=yokP<z@(<@?p
zv5+Y%YkH6RqN#vX*TJ9FlUlC=zJOGSgbSf;SWXaLUXy~UdnF;`H~j%!NyNdCttM-Y
zirP=<Qa!U<N}vXkQ#6&&uNFHuqp6j0{#fq&Bwh%$wLxCdR8B2K^ki&RJ2meUemN0`
zPVxQW+!P_)ECyG0Pz$LK8U%W3e#K9DF<;Wi_+_GRpZYr`<P8pgaiGiIN;*o`;7CVa
z%TsOmzFBXnSrYbk3c;87jYw6hXRC5xc+PxR6o@7{T9;(xJMx-7togpOmTF3M+4Ku}
ze^JO*mBi{`9m}_RMB8pr2<SvNx)<pByqJ$DY*L=n^ud1dXKJSZlsuY}u^G%nU#?97
zc_izZeo2V?rBIUO2d(y+MI%*}jUBo_P#fM;yNaoCuvAFxi^58m=3QI$r)zw7{Z<M(
z^V;(qOS)`ino@9)a;$5Md+FM`R^H?jq&1G>@Jf1;SII4Q)NefB+(5sQ&&?$5v^C-%
zv@Tvrt0nXu+-d)z(GWt6^or0ynHorzVOWi$iP4JGPhV*iUnIuEJqnt28o$Y7H29FG
zY$k_h9t_&doth7&j?<p1tRuo4LPP0PjLX{;TlczZXI^eL=jp-bJv_L)=8@*UhJOp@
Nk1624<_7-7{{wmo^I-r0

literal 0
HcmV?d00001

diff --git a/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.1.parquet b/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2dd74184f465f390ee48426b134445f1da072609
GIT binary patch
literal 3085
zcmcInOK%!i6dnx5B(WPO%21F{WsMx<h2z55ICko`888e(00TA*kfhGMfWb4v1O{Uq
zS$(XcDypLHx{0bN$|8%Xt1i3hqH3$E%c_g2Y8PGf4|G-a+&hNggh;KFj{MEzobR4{
z&Uf#*!a1f8CSBxD4)RAwn)H&yd4ka9-2_3f?N-;29jCQytE~&#B$#-?QraGPNvQRP
zr5bhD<aDU5Oo1iW=}Qe&axBcUZK2a~1U3pSX%BJZnj%?kaBZ#ST56|dph#?^)v-u|
zOp@tEQYL3fH|Y+n5(|@f3ZG{%2&nV-Cg3hRTxW@M)bb`(C@xDis>jZM<U2=wMm;$!
zQs58XcLvC5e}r()`hD)5zNyTG$qBFQz324abq4=(1U+Np`xl{lf}9}WHVY4efbWkT
z<XuqXHLON_XS8CZj9rG~w9l4k%5h=Mt{zpkZ7GW8cq5e1`XG=KES#i3@FkKSsdQ~&
z5)*x%fs{aey^Ln^5obPp|L@<N&Z*?5$)CUZa{s&|_^tc-;%HplqY3ZhiQxB>!AGvZ
z>X?{gtNYwdK6R!+b+p#^oi3U<<sg<%O}U7b84nQwtO70r)&T1>9@iDbcndK$!16ZW
z9l*PQs{jTN1;hZGfH)ulU;#-02S@>~0n&gBAPd+6<N$d<0k93&0Tclx01qewDu62B
zI^YK2Cg455F5nhGnDK0gARM|R`+-(dCL7NL_jv{$pNBr?c!`BER&df^+dIyE_Uhd9
zrVvOzTulGIk(<x#C^yTG^&5X2-CFoWRv$E;Jk|dE%A7{=g7;?vr0aP9O{g+no(CT9
z7n8xKXM(>@1^z<@<5ta|ju1X~M`|d#K)MNX`r^c3n_nc-0mxv)L=Q|ZU_mm3SM~?|
z8?UZMUg3uPe}(h@Fyobj9`7%wFA#)((dRY=Ll!!|?_75%)sU5gU8$vYb)!RFnY&J@
zGIeutj<QX=*soyE9m)`N1vo}m(|}P^6;0kfX|p5mT5$9Lk?I<%)RGkn*hW`3`$I=d
zY{G$v?7CtJvS0~7ze6bo(BZ^DV~ab9OKa58(X(VuW<lBK8;WrvlM(yB&87oKGfa9i
z8&u`dA|!QOp&gfg$$Z%UYUhK6c%e4i_S`(GVXdwp?n#Y@<`Ic;QCQKoB3P|9R$WuY
zw$Sb)(apZlZnt_U*uK*4s4W9o!z)Xnr7#6*P{&h`IO(PE(lT{K_@<dcVT-?Fx2Nxe
zQ*WXxT;F6ECJ#W@HHKMXqID+0Fm)zY$F^wngujQ3@u8n%V)i%OaTM(57^cU-IC!EU
z(F%INH;n86KYE$dwQiLyt(58PzL+doqLERz)TjvdJx&XCxLB$w(g$IYkJJP{vR5rP
zG^x+6atRB@Mj9etMEtJIA9T_&My=|aCbET^s3)xIHt^U;SY&%BPOk#vE9GoWPHH{u
zi>1P%u0i~Q#;LtJ#0!h%Xr#Pch)67o%cvrqx?Sb7A=@9;WHtdY3JtE=sw@4JChFP!
zDhoBBoMNd`DPFO*VyX37=|t{_h|e!~)L~w+R8c8MHEuJmj?Mcyzr-d$r^MlCZZaP+
zEA;vv)Pm{*4Z=O8v>w87VLr~th9snKAN#E;<P9-Gm7y*N>**L;gMp5WTEf~eeY@VO
zS>+G*%JfV8MzkU7g@zO!owL*xc%(@Y*CiSyOG;{^njgx^lqu?x?dMCO3SVf*Y<#$m
zwe23#wp-!D8qy8#1-w2l<|FWV9A}(9uwO#irtKe-hbbC)IvabrHf6|z)3g05p9qP|
z9Lf*0Ixs6nsv#MBcz-}`@SfV2ZH?(_Idv%T>s_38GagDenQmq~1v)e8vmC3MWMoZ|
zUcoUk>dHa7nNe$bhDEJ0;K3{ENk&1p%|X90jG2ReMW1;NcUltphph{*sMT0)5AL}C
zpwWPWjQEP;LD?n*Sp!2n0Zk08i2e8#vzQ7p9`8{IlfnEv1EV1x<SE(7K{F2r<%?tU
z!J!klr|KDC*h4TCPeu6VOrd?dt5}yL)4YTawz=bjYtBB>{2TD2!Tuw5;df^a{t@^G
DkXjW|

literal 0
HcmV?d00001

diff --git a/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.2.parquet b/tests/component/examples/data/test_pipeline/test_pipeline_12345/component_2/part.2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1ae8001c02ac7c803353728bb9e7bb919d774ecc
GIT binary patch
literal 3066
zcmcImOKcle6dgN`;}l0NsAFtoiO6cLF4AB-acHV8I(A~muHz<d;;{`?n4dl4jK@=d
z;y58t%BN5YlwV<0RV)zdf(1(|RYhV2NNf?^Ah7_6JqyHw^WG$`lco|#W#luTd+)pN
z-1FXj@&cC&vkvx88~eL0!#>3_PcY2pl8a%OSzVD@4NV%vd$J)ko6f;ucfD=Mwx|6G
zt)jLxqhj+M&ej_>LAM<^yinJgb;CAM;Dwgb7{aK9W>-=*X#lUWcEvVyBqb>hd?d8S
zpldQ$msXwlnrR4?y1_E+9Ge+qRrV0;WS#yg=GY)j<Q>3RgdMqR$6d8K4l<7iCYFxI
zxN^CCdH9N4o^hsc-##<^-47jx3Emj+vu<CMaUSw{ol9L)84a^`kLtNO;JH2!yk-x&
z_NMlYp?-$7Gq@eXgJJNkvqpw>Q#Gc{zFG)<LY#<_5VS_uX&YY9yO4rZiTa3x@<z6C
zHquv6Y^R_>IuLPB7KqLV-QHimx!bW144t-*NcW6;sN$A$Zxs<*#osp1C42Cz!QdAT
z|J2@u`xbK7$=(^rKvuG_HwPRtGi+m&;UNc89&s@hKm|0Q3akP;Py-C04w%3NpaC>T
zT#gpW&?XtyVCewsKo{5mHh~v`E#M{KW#A(43UCQ{6?hGJ9e4w{47>@v1zZ8%2Cf3{
z0Ph0t0oQ={fe(NWfscTXM_i|_Lp^eRVm-(L%B*@{a<6O0<8{f~F&^gFK2|s*-&*_2
zdGPe8dq(ijeKwZ)_0+<V`6cCi;q%(_ziqv6>|I&Aq2BsZ|KpB1_9Mmw|8o0T$M$Zs
zYcgG<J1)=N!QiKl1-~Bh{|5&9t)4FvWxUR&q$)Lmbuz5`sC{QsA7wIrFz7Qyc1#XL
zfehnOeSY7mN750GYD50Nf_Z)(@u)$U=cfaQ8OAr}b((@93r+9$j?I8($jbVPRM*=z
zqZv3k`dmPh1Lwy^1D0up`W5Qg3>ZR9L5$JXbwrdkMVD80+pNqhEgUci473eRs>@0M
z(MG#wc6*MxSj8zy>@}q&$U;j%_+~&c5ROBGj9p$z96u4*+PY6>H;aIEijf?<nDp8I
zEt@9Jb4<FM4XJW#45YRzwDLwCG9R@++WD{$57cJa_ROOht*J>8yIbR)xldwR6jijL
z2(5a9s;+BdLuhn~=w?@FH0m9aY)xr2wYov9;mPsPcsKwx0^3vfIV0oY@rl3|<8^bn
z+*$sl)t26i!_SZ_T$$lGZV@2Y8IGIeVihjIaTP9Jp|)6Tm%l@dvps)`i(B8g(=61V
z;<yfnad?s>u_Afk8zY;Dk3E~J>FrW7KUs)m*TlJeOEl)Svsz4qeKn<rnyGlYDn{1B
zA|EXad~~%`P<5%Bno1>F7#meZewoC#Wq!SxiE~=1rt4xdR~BoDR_Pq#lF_i3?2tU2
zBE}aB*|I#Rcc?F(4vRG%`Ez<o>r{|0EEZzX!bC1AB}raR6C>#hB|aOn{9#>ACXgei
zrmFRd(oO4PExT4qq6We#p3diIi><Tq^mIADi~9zN=O>z4k5@dstQ2B;YGzj3m-imO
zluSUU#6~|inU9*q$n+{|A@xCna7W2chiG2Rmol;;iRjy<{#FURkt0;>>9RhZiIX+h
z(UH^gR2!~q)mt)4{Q7Dk@({lfQ^i_NmBRgV=Gy{KG+Cx~iAKI9&FTG`Z^(0LQ>;mr
zpU;Pid`^{<v%PgJpX(59+eJRC6W!=upzHG>AAw(_dG^x>`z4gETK;`_n4+;5$;Kb9
zO#wVowQRS<Cqm*xitvM0>t@kNtCF!w_XldjdwNZ_G>()C=?#IOZd2aX*-)m+wdc>J
zp);r5=UCDuBWsG0Nt$C`n_SOS=e6=8mn5yR6NgvQlbk|sGdumpapnU0m3%IyXs68)
zf3J1%N?I+cui{So4~>QpVx(7$4$4-M<OB?}2{bWUk^1Q?jpB;LxV=Y_W}e0`au|(#
z;3-+;(9FF-i_81+p{WzJrz-P^u!hi3Iu+v`HHF57w$eH-ndWhNu#IjXT%*>J<~xNy
P3)YX}@uzbE|N8#{@S7Po

literal 0
HcmV?d00001

diff --git a/tests/component/examples/mock_base_path/example_pipeline/cache/42.txt b/tests/component/examples/mock_base_path/example_pipeline/cache/42.txt
new file mode 100644
index 000000000..4a9ff8afc
--- /dev/null
+++ b/tests/component/examples/mock_base_path/example_pipeline/cache/42.txt
@@ -0,0 +1 @@
+tests/component/examples/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
\ No newline at end of file
diff --git a/tests/component/examples/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json b/tests/component/examples/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
new file mode 100644
index 000000000..47c2fe949
--- /dev/null
+++ b/tests/component/examples/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
@@ -0,0 +1,31 @@
+{
+  "metadata": {
+    "pipeline_name": "example_pipeline",
+    "base_path": "tests/example_data/subsets_input/mock_base_path",
+    "run_id": "example_pipeline_2023",
+    "component_id": "component_1",
+    "cache_key": "42"
+  },
+  "index": {
+    "location": "/component_1"
+  },
+  "fields":
+  {
+    "data": {
+      "type": "binary",
+      "location": "/component_1"
+    },
+    "height": {
+      "type": "int32",
+      "location": "/component_1"
+    },
+    "width": {
+      "type": "int32",
+      "location": "/component_1"
+    },
+    "captions": {
+      "type": "string",
+      "location": "/component_1"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_component.py b/tests/component/test_component.py
similarity index 99%
rename from tests/test_component.py
rename to tests/component/test_component.py
index e5dcb3bc3..830ce2963 100644
--- a/tests/test_component.py
+++ b/tests/component/test_component.py
@@ -23,8 +23,8 @@
 from fondant.core.component_spec import ComponentSpec
 from fondant.core.manifest import Manifest, Metadata
 
-components_path = Path(__file__).parent / "example_specs/components"
-base_path = Path(__file__).parent / "example_specs/mock_base_path"
+components_path = Path(__file__).parent / "examples/component_specs"
+base_path = Path(__file__).parent / "examples/mock_base_path"
 
 N_PARTITIONS = 2
 
diff --git a/tests/test_data_io.py b/tests/component/test_data_io.py
similarity index 61%
rename from tests/test_data_io.py
rename to tests/component/test_data_io.py
index 9ade4a329..30a4b7c10 100644
--- a/tests/test_data_io.py
+++ b/tests/component/test_data_io.py
@@ -8,8 +8,10 @@
 from fondant.core.component_spec import ComponentSpec
 from fondant.core.manifest import Manifest
 
-manifest_path = Path(__file__).parent / "example_data/manifest.json"
-component_spec_path = Path(__file__).parent / "example_data/components/1.yaml"
+manifest_path = Path(__file__).parent / "examples/data/manifest.json"
+component_spec_path = (
+    Path(__file__).parent / "examples/data/components/1.yaml"
+)
 
 NUMBER_OF_TEST_ROWS = 151
 
@@ -37,33 +39,16 @@ def dataframe(manifest, component_spec):
     return data_loader.load_dataframe()
 
 
-def test_load_index(manifest, component_spec):
-    """Test the loading of just the index."""
-    data_loader = DaskDataLoader(manifest=manifest, component_spec=component_spec)
-    index_df = data_loader._load_index()
-    assert len(index_df) == NUMBER_OF_TEST_ROWS
-    assert index_df.index.name == "id"
-
-
-def test_load_subset(manifest, component_spec):
-    """Test the loading of one field of a subset."""
-    data_loader = DaskDataLoader(manifest=manifest, component_spec=component_spec)
-    subset_df = data_loader._load_subset(subset_name="types", fields=["Type 1"])
-    assert len(subset_df) == NUMBER_OF_TEST_ROWS
-    assert list(subset_df.columns) == ["types_Type 1"]
-    assert subset_df.index.name == "id"
-
-
 def test_load_dataframe(manifest, component_spec):
-    """Test merging of subsets in a dataframe based on a component_spec."""
+    """Test merging of fields in a dataframe based on a component_spec."""
     dl = DaskDataLoader(manifest=manifest, component_spec=component_spec)
     dataframe = dl.load_dataframe()
     assert len(dataframe) == NUMBER_OF_TEST_ROWS
     assert list(dataframe.columns) == [
-        "properties_Name",
-        "properties_HP",
-        "types_Type 1",
-        "types_Type 2",
+        "Name",
+        "HP",
+        "Type 1",
+        "Type 2",
     ]
     assert dataframe.index.name == "id"
 
@@ -78,7 +63,7 @@ def test_load_dataframe_default(manifest, component_spec):
 
 
 def test_load_dataframe_rows(manifest, component_spec):
-    """Test merging of subsets in a dataframe based on a component_spec."""
+    """Test merging of fields in a dataframe based on a component_spec."""
     dl = DaskDataLoader(
         manifest=manifest,
         component_spec=component_spec,
@@ -89,34 +74,7 @@ def test_load_dataframe_rows(manifest, component_spec):
     assert dataframe.npartitions == expected_partitions
 
 
-def test_write_index(
-    tmp_path_factory,
-    dataframe,
-    manifest,
-    component_spec,
-    dask_client,
-):
-    """Test writing out the index."""
-    with tmp_path_factory.mktemp("temp") as fn:
-        # override the base path of the manifest with the temp dir
-        manifest.update_metadata("base_path", str(fn))
-        data_writer = DaskDataWriter(
-            manifest=manifest,
-            component_spec=component_spec,
-        )
-        # write out index to temp dir
-        data_writer.write_dataframe(dataframe, dask_client)
-        number_workers = os.cpu_count()
-        # read written data and assert
-        dataframe = dd.read_parquet(fn / "index")
-        assert len(dataframe) == NUMBER_OF_TEST_ROWS
-        assert dataframe.index.name == "id"
-        assert dataframe.npartitions in list(
-            range(number_workers - 1, number_workers + 2),
-        )
-
-
-def test_write_subsets(
+def test_write_dataset(
     tmp_path_factory,
     dataframe,
     manifest,
@@ -125,11 +83,7 @@ def test_write_subsets(
 ):
     """Test writing out subsets."""
     # Dictionary specifying the expected subsets to write and their column names
-    subset_columns_dict = {
-        "index": [],
-        "properties": ["Name", "HP"],
-        "types": ["Type 1", "Type 2"],
-    }
+    columns = ["Name", "HP", "Type 1", "Type 2"]
     with tmp_path_factory.mktemp("temp") as fn:
         # override the base path of the manifest with the temp dir
         manifest.update_metadata("base_path", str(fn))
@@ -137,13 +91,13 @@ def test_write_subsets(
         # write dataframe to temp dir
         data_writer.write_dataframe(dataframe, dask_client)
         # read written data and assert
-        for subset, subset_columns in subset_columns_dict.items():
-            dataframe = dd.read_parquet(fn / subset)
-            assert len(dataframe) == NUMBER_OF_TEST_ROWS
-            assert list(dataframe.columns) == subset_columns
-            assert dataframe.index.name == "id"
+        dataframe = dd.read_parquet(fn)
+        assert len(dataframe) == NUMBER_OF_TEST_ROWS
+        assert list(dataframe.columns) == columns
+        assert dataframe.index.name == "id"
 
 
+# TODO: check if this is still needed?
 def test_write_reset_index(
     tmp_path_factory,
     dataframe,
@@ -151,7 +105,7 @@ def test_write_reset_index(
     component_spec,
     dask_client,
 ):
-    """Test writing out the index and subsets that have no dask index and checking
+    """Test writing out the index and fields that have no dask index and checking
     if the id index was created.
     """
     dataframe = dataframe.reset_index(drop=True)
@@ -160,10 +114,8 @@ def test_write_reset_index(
 
         data_writer = DaskDataWriter(manifest=manifest, component_spec=component_spec)
         data_writer.write_dataframe(dataframe, dask_client)
-
-        for subset in ["properties", "types", "index"]:
-            dataframe = dd.read_parquet(fn / subset)
-            assert dataframe.index.name == "id"
+        dataframe = dd.read_parquet(fn)
+        assert dataframe.index.name == "id"
 
 
 @pytest.mark.parametrize("partitions", list(range(1, 5)))
@@ -189,29 +141,51 @@ def test_write_divisions(  # noqa: PLR0913
 
         data_writer.write_dataframe(dataframe, dask_client)
 
-        for target in ["properties", "types", "index"]:
-            dataframe = dd.read_parquet(fn / target)
-            assert dataframe.index.name == "id"
-            assert dataframe.npartitions == partitions
+        dataframe = dd.read_parquet(fn)
+        assert dataframe.index.name == "id"
+        assert dataframe.npartitions == partitions
+
+
+def test_write_fields_invalid(
+    tmp_path_factory,
+    dataframe,
+    manifest,
+    component_spec,
+    dask_client,
+):
+    """Test writing out fields but the dataframe columns are incomplete."""
+    with tmp_path_factory.mktemp("temp") as fn:
+        # override the base path of the manifest with the temp dir
+        manifest.update_metadata("base_path", str(fn))
+        # Drop one of the columns required in the output
+        dataframe = dataframe.drop(["Type 2"], axis=1)
+        data_writer = DaskDataWriter(manifest=manifest, component_spec=component_spec)
+        expected_error_msg = (
+            r"Fields \['Type 2'\] defined in output dataset "
+            r"but not found in dataframe"
+        )
+        with pytest.raises(ValueError, match=expected_error_msg):
+            data_writer.write_dataframe(dataframe, dask_client)
 
 
-def test_write_subsets_invalid(
+def test_write_fields_invalid_several_fields_missing(
     tmp_path_factory,
     dataframe,
     manifest,
     component_spec,
     dask_client,
 ):
-    """Test writing out subsets but the dataframe columns are incomplete."""
+    """Test writing out fields but the dataframe columns are incomplete."""
     with tmp_path_factory.mktemp("temp") as fn:
         # override the base path of the manifest with the temp dir
         manifest.update_metadata("base_path", str(fn))
         # Drop one of the columns required in the output
-        dataframe = dataframe.drop(["types_Type 2"], axis=1)
+        dataframe = dataframe.drop(["Type 1"], axis=1)
+        dataframe = dataframe.drop(["Type 2"], axis=1)
         data_writer = DaskDataWriter(manifest=manifest, component_spec=component_spec)
         expected_error_msg = (
-            r"Field \['types_Type 2'\] not in index defined in output subset "
-            r"types but not found in dataframe"
+            r"Fields \['Type 1', 'Type 2'\] defined in output dataset "
+            r"but not found in dataframe"
         )
         with pytest.raises(ValueError, match=expected_error_msg):
             data_writer.write_dataframe(dataframe, dask_client)
diff --git a/tests/core/test_manifest.py b/tests/core/test_manifest.py
index 0b255b9df..c24d27c9c 100644
--- a/tests/core/test_manifest.py
+++ b/tests/core/test_manifest.py
@@ -1,6 +1,5 @@
 import json
 import pkgutil
-from collections import OrderedDict
 from pathlib import Path
 
 import pytest
@@ -226,21 +225,3 @@ def test_fields():
     # delete a field
     manifest.remove_field(name="field_1")
     assert "field_1" not in manifest.fields
-
-
-def test_field_mapping(valid_manifest):
-    """Test field mapping generation."""
-    manifest = Manifest(valid_manifest)
-    manifest.add_or_update_field(Field(name="index", location="component2"))
-    field_mapping = manifest.field_mapping
-    assert field_mapping == OrderedDict(
-        {
-            "gs://bucket/test_pipeline/test_pipeline_12345/component2": [
-                "id",
-                "height",
-                "width",
-            ],
-            "gs://bucket/test_pipeline/test_pipeline_12345/component1": ["images"],
-            "gs://bucket/test_pipeline/test_pipeline_12345/component3": ["caption"],
-        },
-    )
diff --git a/tests/examples/example_data/raw/split.py b/tests/examples/example_data/raw/split.py
index 6800ee323..ade466125 100644
--- a/tests/examples/example_data/raw/split.py
+++ b/tests/examples/example_data/raw/split.py
@@ -13,7 +13,7 @@
 import dask.dataframe as dd
 
 data_path = Path(__file__).parent
-output_path = Path(__file__).parent.parent / "subsets_input/"
+output_path = Path(__file__).parent.parent
 
 
 def split_into_subsets():
@@ -22,17 +22,13 @@ def split_into_subsets():
     master_df = master_df.set_index("id", sorted=True)
     master_df = master_df.repartition(divisions=[0, 50, 100, 151], force=True)
 
-    # create index subset
-    index_df = master_df.index.to_frame().drop(columns=["id"])
-    index_df.to_parquet(output_path / "index")
-
     # create properties subset
     properties_df = master_df[["Name", "HP"]]
-    properties_df.to_parquet(output_path / "properties")
+    properties_df.to_parquet(output_path / "component_1")
 
     # create types subset
     types_df = master_df[["Type 1", "Type 2"]]
-    types_df.to_parquet(output_path / "types")
+    types_df.to_parquet(output_path / "component_2")
 
 
 if __name__ == "__main__":

From e4eadf3ddb59925dd2d86c66abf2d70513ce3eb7 Mon Sep 17 00:00:00 2001
From: Matthias Richter <matthias.r1092@gmail.com>
Date: Fri, 24 Nov 2023 08:46:50 +0100
Subject: [PATCH 3/4] Use new data format (#667)

This PR applies the usage of the new data format:

- fixes all tests
- update component specifications and component code
- remove subset field usage in `pipeline.py`

---------

Co-authored-by: Robbe Sneyders <robbe.sneyders@gmail.com>
---
 components/caption_images/README.md           |   6 +-
 .../caption_images/fondant_component.yaml     |  12 +--
 components/caption_images/src/main.py         |   4 +-
 components/chunk_text/README.md               |   8 +-
 components/chunk_text/fondant_component.yaml  |  16 ++--
 components/chunk_text/src/main.py             |   7 +-
 .../chunk_text/tests/chunk_text_test.py       |   6 +-
 components/download_images/README.md          |  10 +--
 .../download_images/fondant_component.yaml    |  24 +++---
 components/download_images/src/main.py        |   5 +-
 .../download_images/tests/test_component.py   |   8 +-
 components/embed_images/README.md             |   6 +-
 .../embed_images/fondant_component.yaml       |  18 ++---
 components/embed_images/src/main.py           |   4 +-
 components/embed_text/README.md               |   8 +-
 components/embed_text/fondant_component.yaml  |  22 +++---
 components/embed_text/src/main.py             |   4 +-
 .../embedding_based_laion_retrieval/README.md |   6 +-
 .../fondant_component.yaml                    |  18 ++---
 .../src/main.py                               |   6 +-
 components/filter_image_resolution/README.md  |   5 +-
 .../fondant_component.yaml                    |  10 +--
 .../filter_image_resolution/src/main.py       |   4 +-
 components/filter_text_length/README.md       |   3 +-
 .../filter_text_length/fondant_component.yaml |   6 +-
 components/filter_text_length/src/main.py     |   4 +-
 .../tests/text_length_filter_test.py          |   2 +-
 components/image_cropping/README.md           |  10 +--
 .../image_cropping/fondant_component.yaml     |  20 ++---
 components/image_cropping/src/main.py         |   4 +-
 .../image_resolution_extraction/README.md     |  10 +--
 .../fondant_component.yaml                    |  20 ++---
 .../image_resolution_extraction/src/main.py   |   7 +-
 components/index_weaviate/README.md           |   5 +-
 .../index_weaviate/fondant_component.yaml     |  14 ++--
 components/language_filter/README.md          |   3 +-
 .../language_filter/fondant_component.yaml    |   6 +-
 components/language_filter/src/main.py        |   2 +-
 components/load_from_files/README.md          |   5 +-
 .../load_from_files/fondant_component.yaml    |  12 ++-
 components/load_from_hf_hub/README.md         |   3 +-
 .../load_from_hf_hub/fondant_component.yaml   |  10 +--
 components/load_from_hf_hub/src/main.py       |  25 +++----
 components/load_from_parquet/README.md        |   3 +-
 .../load_from_parquet/fondant_component.yaml  |   6 +-
 components/load_from_parquet/src/main.py      |  25 +++----
 components/minhash_generator/README.md        |   6 +-
 .../minhash_generator/fondant_component.yaml  |  16 ++--
 components/minhash_generator/src/main.py      |   4 +-
 components/normalize_text/README.md           |   3 +-
 .../normalize_text/fondant_component.yaml     |   6 +-
 components/normalize_text/src/main.py         |  12 +--
 .../prompt_based_laion_retrieval/README.md    |   6 +-
 .../fondant_component.yaml                    |  14 ++--
 .../prompt_based_laion_retrieval/src/main.py  |   6 +-
 components/resize_images/README.md            |   6 +-
 .../resize_images/fondant_component.yaml      |  12 +--
 components/resize_images/src/main.py          |   2 +-
 components/segment_images/README.md           |   6 +-
 .../segment_images/fondant_component.yaml     |  12 +--
 components/segment_images/src/main.py         |   2 +-
 components/write_to_hf_hub/README.md          |   3 +-
 .../write_to_hf_hub/fondant_component.yaml    |   8 +-
 components/write_to_hf_hub/src/main.py        |  19 +++--
 scripts/component_readme/readme_template.md   |  14 +---
 src/fondant/core/manifest.py                  |   8 +-
 src/fondant/core/schema.py                    |   3 +-
 src/fondant/pipeline/pipeline.py              |  56 ++++++--------
 tests/component/test_data_io.py               |   4 +-
 tests/examples/example_data/components/1.yaml |  35 ---------
 tests/examples/example_data/manifest.json     |  35 ---------
 tests/examples/example_data/raw/split.py      |  35 ---------
 .../examples/example_data/raw/testset.parquet | Bin 15048 -> 0 bytes
 .../subsets_input/index/part.0.parquet        | Bin 1701 -> 0 bytes
 .../subsets_input/index/part.1.parquet        | Bin 1707 -> 0 bytes
 .../subsets_input/index/part.2.parquet        | Bin 1715 -> 0 bytes
 .../subsets_input/properties/part.0.parquet   | Bin 3542 -> 0 bytes
 .../subsets_input/properties/part.1.parquet   | Bin 3526 -> 0 bytes
 .../subsets_input/properties/part.2.parquet   | Bin 3584 -> 0 bytes
 .../subsets_input/types/part.0.parquet        | Bin 3018 -> 0 bytes
 .../subsets_input/types/part.1.parquet        | Bin 3085 -> 0 bytes
 .../subsets_input/types/part.2.parquet        | Bin 3066 -> 0 bytes
 .../second_component/fondant_component.yaml   |  27 -------
 .../second_component/fondant_component.yaml   |  29 --------
 .../second_component/fondant_component.yaml   |  27 -------
 .../fourth_component/fondant_component.yaml   |  38 ----------
 .../third_component/fondant_component.yaml    |  33 ---------
 .../components/arguments/component.yaml       |  68 -----------------
 .../arguments/component_default_args.yaml     |  69 ------------------
 .../components/arguments/input_manifest.json  |  18 -----
 .../components/input_manifest.json            |  17 -----
 .../example_pipeline/cache/42.txt             |   1 -
 .../component_1/manifest.json                 |  31 --------
 .../component_2/manifest.json                 |  36 ---------
 .../component_1/manifest.json                 |  36 ---------
 .../component_2/manifest.json                 |  36 ---------
 .../components/dummy_component/Dockerfile     |   0
 .../components/dummy_component/README.md      |   0
 .../dummy_component/fondant_component.yaml    |   8 +-
 .../dummy_component/requirements.txt          |   0
 .../components/dummy_component/src/main.py    |   0
 .../load_from_parquet/fondant_component.yaml  |   4 +-
 .../sample_pipeline_test/data/sample.parquet  | Bin
 .../test_sample_pipeline.py                   |   6 +-
 .../compiled_pipeline/kubeflow_pipeline.yml   |   0
 .../first_component/fondant_component.yaml    |  12 +--
 .../second_component/fondant_component.yaml   |  19 +++--
 .../first_component/fondant_component.yaml    |  17 ++---
 .../second_component/fondant_component.yaml   |  24 ++++++
 .../first_component/fondant_component.yaml    |  18 ++---
 .../second_component/fondant_component.yaml   |  21 ++++++
 .../example_1/first_component/Dockerfile      |   0
 .../first_component/fondant_component.yaml    |  19 ++---
 .../example_1/fourth_component/Dockerfile     |   0
 .../fourth_component/fondant_component.yaml   |  29 ++++++++
 .../example_1/second_component/Dockerfile     |   0
 .../second_component/fondant_component.yaml   |  18 +++++
 .../example_1/third_component/Dockerfile      |   0
 .../third_component/fondant_component.yaml}   |  19 ++---
 tests/{ => pipeline}/test_compiler.py         |   4 +-
 tests/{ => pipeline}/test_pipeline.py         |   4 +-
 tests/{ => pipeline}/test_runner.py           |   2 +-
 tests/test_cli.py                             |  32 ++++----
 tox.ini                                       |   2 +-
 124 files changed, 420 insertions(+), 1059 deletions(-)
 delete mode 100644 tests/examples/example_data/components/1.yaml
 delete mode 100644 tests/examples/example_data/manifest.json
 delete mode 100644 tests/examples/example_data/raw/split.py
 delete mode 100644 tests/examples/example_data/raw/testset.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/index/part.0.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/index/part.1.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/index/part.2.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/properties/part.0.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/properties/part.1.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/properties/part.2.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/types/part.0.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/types/part.1.parquet
 delete mode 100644 tests/examples/example_data/subsets_input/types/part.2.parquet
 delete mode 100644 tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
 delete mode 100644 tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
 delete mode 100644 tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
 delete mode 100644 tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
 delete mode 100644 tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
 delete mode 100644 tests/examples/example_specs/components/arguments/component.yaml
 delete mode 100644 tests/examples/example_specs/components/arguments/component_default_args.yaml
 delete mode 100644 tests/examples/example_specs/components/arguments/input_manifest.json
 delete mode 100644 tests/examples/example_specs/components/input_manifest.json
 delete mode 100644 tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt
 delete mode 100644 tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
 delete mode 100644 tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json
 delete mode 100644 tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json
 delete mode 100644 tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json
 rename tests/{ => integration_tests}/sample_pipeline_test/components/dummy_component/Dockerfile (100%)
 rename tests/{ => integration_tests}/sample_pipeline_test/components/dummy_component/README.md (100%)
 rename tests/{ => integration_tests}/sample_pipeline_test/components/dummy_component/fondant_component.yaml (73%)
 rename tests/{ => integration_tests}/sample_pipeline_test/components/dummy_component/requirements.txt (100%)
 rename tests/{ => integration_tests}/sample_pipeline_test/components/dummy_component/src/main.py (100%)
 rename tests/{ => integration_tests}/sample_pipeline_test/components/load_from_parquet/fondant_component.yaml (95%)
 rename tests/{ => integration_tests}/sample_pipeline_test/data/sample.parquet (100%)
 rename tests/{ => integration_tests}/test_sample_pipeline.py (91%)
 rename tests/{examples/example_pipelines => pipeline/examples/pipelines}/compiled_pipeline/kubeflow_pipeline.yml (100%)
 rename tests/{examples/example_pipelines => pipeline/examples/pipelines}/invalid_pipeline/example_1/first_component/fondant_component.yaml (62%)
 rename tests/{examples/example_pipelines/valid_pipeline => pipeline/examples/pipelines/invalid_pipeline}/example_1/second_component/fondant_component.yaml (55%)
 rename tests/{examples/example_pipelines/valid_pipeline/example_1 => pipeline/examples/pipelines/invalid_pipeline/example_2}/first_component/fondant_component.yaml (61%)
 create mode 100644 tests/pipeline/examples/pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
 rename tests/{examples/example_pipelines => pipeline/examples/pipelines}/invalid_pipeline/example_3/first_component/fondant_component.yaml (53%)
 create mode 100644 tests/pipeline/examples/pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
 rename tests/{examples/example_pipelines => pipeline/examples/pipelines}/valid_pipeline/example_1/first_component/Dockerfile (100%)
 rename tests/{examples/example_pipelines/invalid_pipeline/example_2 => pipeline/examples/pipelines/valid_pipeline/example_1}/first_component/fondant_component.yaml (50%)
 rename tests/{examples/example_pipelines => pipeline/examples/pipelines}/valid_pipeline/example_1/fourth_component/Dockerfile (100%)
 create mode 100644 tests/pipeline/examples/pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
 rename tests/{examples/example_pipelines => pipeline/examples/pipelines}/valid_pipeline/example_1/second_component/Dockerfile (100%)
 create mode 100644 tests/pipeline/examples/pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
 rename tests/{examples/example_pipelines => pipeline/examples/pipelines}/valid_pipeline/example_1/third_component/Dockerfile (100%)
 rename tests/{examples/example_specs/components/component.yaml => pipeline/examples/pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml} (59%)
 rename tests/{ => pipeline}/test_compiler.py (99%)
 rename tests/{ => pipeline}/test_pipeline.py (98%)
 rename tests/{ => pipeline}/test_runner.py (98%)

diff --git a/components/caption_images/README.md b/components/caption_images/README.md
index 8bb38e996..401747cbb 100644
--- a/components/caption_images/README.md
+++ b/components/caption_images/README.md
@@ -7,13 +7,11 @@ This component captions images using a BLIP model from the Hugging Face hub
 
 **This component consumes:**
 
-- images
-    - data: binary
+- images_data: binary
 
 **This component produces:**
 
-- captions
-    - text: string
+- captions_text: string
 
 ### Arguments
 
diff --git a/components/caption_images/fondant_component.yaml b/components/caption_images/fondant_component.yaml
index 7a72cd815..3da8e4720 100644
--- a/components/caption_images/fondant_component.yaml
+++ b/components/caption_images/fondant_component.yaml
@@ -5,16 +5,12 @@ tags:
   - Image processing
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  captions:
-    fields:
-      text:
-        type: utf8
+  captions_text:
+    type: utf8
 
 args:
   model_id:
diff --git a/components/caption_images/src/main.py b/components/caption_images/src/main.py
index 934ea09ce..86be52b40 100644
--- a/components/caption_images/src/main.py
+++ b/components/caption_images/src/main.py
@@ -90,7 +90,7 @@ def __init__(
         self.max_new_tokens = max_new_tokens
 
     def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
-        images = dataframe["images"]["data"]
+        images = dataframe["images_data"]
 
         results: t.List[pd.Series] = []
         for batch in np.split(
@@ -112,4 +112,4 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
                 ).T
                 results.append(captions)
 
-        return pd.concat(results).to_frame(name=("captions", "text"))
+        return pd.concat(results).to_frame(name=("captions_text"))
diff --git a/components/chunk_text/README.md b/components/chunk_text/README.md
index 97b3309e0..a12d74980 100644
--- a/components/chunk_text/README.md
+++ b/components/chunk_text/README.md
@@ -11,14 +11,12 @@ consists of the id of the original document followed by the chunk index.
 
 **This component consumes:**
 
-- text
-    - data: string
+- text_data: string
 
 **This component produces:**
 
-- text
-    - data: string
-    - original_document_id: string
+- text_data: string
+- text_original_document_id: string
 
 ### Arguments
 
diff --git a/components/chunk_text/fondant_component.yaml b/components/chunk_text/fondant_component.yaml
index d266b4dac..159e67556 100644
--- a/components/chunk_text/fondant_component.yaml
+++ b/components/chunk_text/fondant_component.yaml
@@ -10,18 +10,14 @@ tags:
   - Text processing
 
 consumes:
-  text:
-    fields:
-      data:
-        type: string
+  text_data:
+    type: string
 
 produces:
-  text:
-    fields:
-      data:
-        type: string
-      original_document_id:
-        type: string
+  text_data:
+    type: string
+  text_original_document_id:
+    type: string
 
 args:
   chunk_size:
diff --git a/components/chunk_text/src/main.py b/components/chunk_text/src/main.py
index 8c41220d2..da46cbbd7 100644
--- a/components/chunk_text/src/main.py
+++ b/components/chunk_text/src/main.py
@@ -38,7 +38,7 @@ def __init__(
     def chunk_text(self, row) -> t.List[t.Tuple]:
         # Multi-index df has id under the name attribute
         doc_id = row.name
-        text_data = row[("text", "data")]
+        text_data = row[("text_data")]
         docs = self.text_splitter.create_documents([text_data])
         return [
             (doc_id, f"{doc_id}_{chunk_id}", chunk.page_content)
@@ -63,9 +63,4 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
         )
         results_df = results_df.set_index("id")
 
-        # Set multi-index column for the expected subset and field
-        results_df.columns = pd.MultiIndex.from_product(
-            [["text"], results_df.columns],
-        )
-
         return results_df
diff --git a/components/chunk_text/tests/chunk_text_test.py b/components/chunk_text/tests/chunk_text_test.py
index a47683ed3..f95180f98 100644
--- a/components/chunk_text/tests/chunk_text_test.py
+++ b/components/chunk_text/tests/chunk_text_test.py
@@ -7,7 +7,7 @@ def test_transform():
     """Test chunk component method."""
     input_dataframe = pd.DataFrame(
         {
-            ("text", "data"): [
+            ("text_data"): [
                 "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo",
                 "ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis",
                 "parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec,",
@@ -25,8 +25,8 @@ def test_transform():
 
     expected_output_dataframe = pd.DataFrame(
         {
-            ("text", "original_document_id"): ["a", "a", "a", "b", "b", "c", "c"],
-            ("text", "data"): [
+            ("text_original_document_id"): ["a", "a", "a", "b", "b", "c", "c"],
+            ("text_data"): [
                 "Lorem ipsum dolor sit amet, consectetuer",
                 "amet, consectetuer adipiscing elit. Aenean",
                 "elit. Aenean commodo",
diff --git a/components/download_images/README.md b/components/download_images/README.md
index b491007b5..6ed54d66d 100644
--- a/components/download_images/README.md
+++ b/components/download_images/README.md
@@ -14,15 +14,13 @@ from the img2dataset library.
 
 **This component consumes:**
 
-- images
-    - url: string
+- images_url: string
 
 **This component produces:**
 
-- images
-    - data: binary
-    - width: int32
-    - height: int32
+- images_data: binary
+- images_width: int32
+- images_height: int32
 
 ### Arguments
 
diff --git a/components/download_images/fondant_component.yaml b/components/download_images/fondant_component.yaml
index 1982a96ba..abe19c653 100644
--- a/components/download_images/fondant_component.yaml
+++ b/components/download_images/fondant_component.yaml
@@ -13,21 +13,17 @@ tags:
   - Image processing
 
 consumes:
-  images:
-    fields:
-      url:
-        type: string
+  images_url:
+    type: string
 
 produces:
-  images:
-    fields:
-      data:
-        type: binary
-      width:
-        type: int32
-      height:
-        type: int32
-    additionalFields: false
+  images_data:
+    type: binary
+  images_width:
+    type: int32
+  images_height:
+    type: int32
+#    additionalFields: false
 
 args:
   timeout:
@@ -53,7 +49,7 @@ args:
     description: Resize mode to use. One of "no", "keep_ratio", "center_crop", "border".
     type: str
     default: 'border'
-  resize_only_if_bigger: 
+  resize_only_if_bigger:
     description: If True, resize only if image is bigger than image_size.
     type: bool
     default: False
diff --git a/components/download_images/src/main.py b/components/download_images/src/main.py
index 8a37b86eb..070859e07 100644
--- a/components/download_images/src/main.py
+++ b/components/download_images/src/main.py
@@ -119,7 +119,7 @@ async def download_dataframe() -> None:
             images = await asyncio.gather(
                 *[
                     self.download_and_resize_image(id_, url, semaphore=semaphore)
-                    for id_, url in zip(dataframe.index, dataframe["images"]["url"])
+                    for id_, url in zip(dataframe.index, dataframe["images_url"])
                 ],
             )
             results.extend(images)
@@ -134,8 +134,5 @@ async def download_dataframe() -> None:
 
         results_df = results_df.dropna()
         results_df = results_df.set_index("id", drop=True)
-        results_df.columns = pd.MultiIndex.from_product(
-            [["images"], results_df.columns],
-        )
 
         return results_df
diff --git a/components/download_images/tests/test_component.py b/components/download_images/tests/test_component.py
index 1f690e6e5..d851ecd73 100644
--- a/components/download_images/tests/test_component.py
+++ b/components/download_images/tests/test_component.py
@@ -45,7 +45,7 @@ def test_transform(respx_mock):
 
     input_dataframe = pd.DataFrame(
         {
-            ("images", "url"): urls,
+            "images_url": urls,
         },
         index=pd.Index(ids, name="id"),
     )
@@ -55,9 +55,9 @@ def test_transform(respx_mock):
     resized_images = [component.resizer(io.BytesIO(image))[0] for image in images]
     expected_dataframe = pd.DataFrame(
         {
-            ("images", "data"): resized_images,
-            ("images", "width"): [image_size] * len(ids),
-            ("images", "height"): [image_size] * len(ids),
+            "images_data": resized_images,
+            "images_width": [image_size] * len(ids),
+            "images_height": [image_size] * len(ids),
         },
         index=pd.Index(ids, name="id"),
     )
diff --git a/components/embed_images/README.md b/components/embed_images/README.md
index eec02f577..23e746136 100644
--- a/components/embed_images/README.md
+++ b/components/embed_images/README.md
@@ -7,13 +7,11 @@ Component that generates CLIP embeddings from images
 
 **This component consumes:**
 
-- images
-    - data: binary
+- images_data: binary
 
 **This component produces:**
 
-- embeddings
-    - data: list<item: float>
+- embeddings_data: list<item: float>
 
 ### Arguments
 
diff --git a/components/embed_images/fondant_component.yaml b/components/embed_images/fondant_component.yaml
index a176b2f6b..86fdb53a4 100644
--- a/components/embed_images/fondant_component.yaml
+++ b/components/embed_images/fondant_component.yaml
@@ -2,21 +2,17 @@ name: Embed images
 description: Component that generates CLIP embeddings from images
 image: fndnt/embed_images:dev
 tags:
-   - Image processing
+  - Image processing
 
 consumes:
-  images:
-      fields:
-        data:
-          type: binary
+  images_data:
+    type: binary
 
 produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
+  embeddings_data:
+    type: array
+    items:
+      type: float32
 
 args:
   model_id:
diff --git a/components/embed_images/src/main.py b/components/embed_images/src/main.py
index 03c647dc0..a0270b1e8 100644
--- a/components/embed_images/src/main.py
+++ b/components/embed_images/src/main.py
@@ -90,7 +90,7 @@ def __init__(
         self.batch_size = batch_size
 
     def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
-        images = dataframe["images"]["data"]
+        images = dataframe["images_data"]
 
         results: t.List[pd.Series] = []
         for batch in np.split(
@@ -110,4 +110,4 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
                 ).T
                 results.append(embeddings)
 
-        return pd.concat(results).to_frame(name=("embeddings", "data"))
+        return pd.concat(results).to_frame(name=("embeddings_data"))
diff --git a/components/embed_text/README.md b/components/embed_text/README.md
index a30a9ec4f..c53a779b9 100644
--- a/components/embed_text/README.md
+++ b/components/embed_text/README.md
@@ -7,14 +7,12 @@ Component that generates embeddings of text passages.
 
 **This component consumes:**
 
-- text
-    - data: string
+- text_data: string
 
 **This component produces:**
 
-- text
-    - data: string
-    - embedding: list<item: float>
+- text_data: string
+- text_embedding: list<item: float>
 
 ### Arguments
 
diff --git a/components/embed_text/fondant_component.yaml b/components/embed_text/fondant_component.yaml
index 2e34c5c0a..a1a3ca816 100644
--- a/components/embed_text/fondant_component.yaml
+++ b/components/embed_text/fondant_component.yaml
@@ -5,21 +5,17 @@ tags:
   - Text processing
 
 consumes:
-  text:
-    fields:
-      data:
-        type: string
+  text_data:
+    type: string
 
 produces:
-  text:
-    fields:
-      data:
-        type: string
-      embedding:
-        type: array
-        items:
-          type: float32
-          
+  text_data:
+    type: string
+  text_embedding:
+    type: array
+    items:
+      type: float32
+
 args:
   model_provider:
     description: |
diff --git a/components/embed_text/src/main.py b/components/embed_text/src/main.py
index c8c2acfde..3fdc08e47 100644
--- a/components/embed_text/src/main.py
+++ b/components/embed_text/src/main.py
@@ -65,7 +65,7 @@ def get_embeddings_vectors(self, texts):
         return self.embedding_model.embed_documents(texts.tolist())
 
     def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
-        dataframe[("text", "embedding")] = self.get_embeddings_vectors(
-            dataframe[("text", "data")],
+        dataframe["text_embedding"] = self.get_embeddings_vectors(
+            dataframe["text_data"],
         )
         return dataframe
diff --git a/components/embedding_based_laion_retrieval/README.md b/components/embedding_based_laion_retrieval/README.md
index 454253416..f19d55b03 100644
--- a/components/embedding_based_laion_retrieval/README.md
+++ b/components/embedding_based_laion_retrieval/README.md
@@ -9,13 +9,11 @@ used to find images similar to the embedded images / captions.
 
 **This component consumes:**
 
-- embeddings
-    - data: list<item: float>
+- embeddings_data: list<item: float>
 
 **This component produces:**
 
-- images
-    - url: string
+- images_url: string
 
 ### Arguments
 
diff --git a/components/embedding_based_laion_retrieval/fondant_component.yaml b/components/embedding_based_laion_retrieval/fondant_component.yaml
index d93e634a3..af147c158 100644
--- a/components/embedding_based_laion_retrieval/fondant_component.yaml
+++ b/components/embedding_based_laion_retrieval/fondant_component.yaml
@@ -7,19 +7,15 @@ tags:
   - Data retrieval
 
 consumes:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
+  embeddings_data:
+    type: array
+    items:
+      type: float32
 
 produces:
-  images:
-    fields:
-      url:
-        type: string
-  additionalSubsets: false
+  images_url:
+    type: string
+# additionalFields: false
 
 args:
   num_images:
diff --git a/components/embedding_based_laion_retrieval/src/main.py b/components/embedding_based_laion_retrieval/src/main.py
index b350e6142..0f7697dc3 100644
--- a/components/embedding_based_laion_retrieval/src/main.py
+++ b/components/embedding_based_laion_retrieval/src/main.py
@@ -58,18 +58,18 @@ async def async_query():
                             embedding_input=embedding.tolist(),
                         ),
                     )
-                    for embedding in dataframe["embeddings"]["data"]
+                    for embedding in dataframe["embeddings_data"]
                 ]
                 for response in await asyncio.gather(*futures):
                     results.extend(response)
 
         loop.run_until_complete(async_query())
 
-        results_df = pd.DataFrame(results)[["id", "url"]]
+        results_df = pd.DataFrame(results)["id", "url"]
         results_df = results_df.set_index("id")
 
         # Cast the index to string
         results_df.index = results_df.index.astype(str)
-        results_df.columns = [["images"], ["url"]]
+        results_df.columns = ["images_url"]
 
         return results_df
diff --git a/components/filter_image_resolution/README.md b/components/filter_image_resolution/README.md
index 1bc0c27f5..e7093e680 100644
--- a/components/filter_image_resolution/README.md
+++ b/components/filter_image_resolution/README.md
@@ -7,9 +7,8 @@ Component that filters images based on minimum size and max aspect ratio
 
 **This component consumes:**
 
-- images
-    - width: int32
-    - height: int32
+- images_width: int32
+- images_height: int32
 
 **This component produces no data.**
 
diff --git a/components/filter_image_resolution/fondant_component.yaml b/components/filter_image_resolution/fondant_component.yaml
index 0512d87f9..b6ff8cbe7 100644
--- a/components/filter_image_resolution/fondant_component.yaml
+++ b/components/filter_image_resolution/fondant_component.yaml
@@ -5,12 +5,10 @@ tags:
   - Image processing
 
 consumes:
-  images:
-    fields:
-      width:
-        type: int32
-      height:
-        type: int32
+  images_width:
+    type: int32
+  images_height:
+    type: int32
 
 args:
   min_image_dim:
diff --git a/components/filter_image_resolution/src/main.py b/components/filter_image_resolution/src/main.py
index 8fbfdfa77..b169196ec 100644
--- a/components/filter_image_resolution/src/main.py
+++ b/components/filter_image_resolution/src/main.py
@@ -23,8 +23,8 @@ def __init__(self, *_, min_image_dim: int, max_aspect_ratio: float) -> None:
         self.max_aspect_ratio = max_aspect_ratio
 
     def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
-        width = dataframe["images"]["width"]
-        height = dataframe["images"]["height"]
+        width = dataframe["images_width"]
+        height = dataframe["images_height"]
         min_image_dim = np.minimum(width, height)
         max_image_dim = np.maximum(width, height)
         aspect_ratio = max_image_dim / min_image_dim
diff --git a/components/filter_text_length/README.md b/components/filter_text_length/README.md
index ed89dd128..4c5730180 100644
--- a/components/filter_text_length/README.md
+++ b/components/filter_text_length/README.md
@@ -7,8 +7,7 @@ A component that filters out text based on their length
 
 **This component consumes:**
 
-- text
-    - data: string
+- text_data: string
 
 **This component produces no data.**
 
diff --git a/components/filter_text_length/fondant_component.yaml b/components/filter_text_length/fondant_component.yaml
index fee0fb242..2451f5981 100644
--- a/components/filter_text_length/fondant_component.yaml
+++ b/components/filter_text_length/fondant_component.yaml
@@ -5,10 +5,8 @@ tags:
   - Text processing
 
 consumes:
-  text:
-    fields:
-      data:
-        type: string
+  text_data:
+    type: string
 
 args:
   min_characters_length:
diff --git a/components/filter_text_length/src/main.py b/components/filter_text_length/src/main.py
index 3e2f472a4..e3a6b0d61 100644
--- a/components/filter_text_length/src/main.py
+++ b/components/filter_text_length/src/main.py
@@ -23,10 +23,10 @@ def __init__(self, *_, min_characters_length: int, min_words_length: int):
 
     def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
         """Filter out text based on their length."""
-        caption_num_words = dataframe["text"]["data"].apply(
+        caption_num_words = dataframe["text_data"].apply(
             lambda x: len(fasttext.tokenize(x)),
         )
-        caption_num_chars = dataframe["text"]["data"].apply(len)
+        caption_num_chars = dataframe["text_data"].apply(len)
 
         mask = (caption_num_words >= self.min_words_length) & (
             caption_num_chars >= self.min_characters_length
diff --git a/components/filter_text_length/tests/text_length_filter_test.py b/components/filter_text_length/tests/text_length_filter_test.py
index eea98864e..55c927e79 100644
--- a/components/filter_text_length/tests/text_length_filter_test.py
+++ b/components/filter_text_length/tests/text_length_filter_test.py
@@ -24,6 +24,6 @@ def test_run_component_test():
     # Then: dataframe only contains one row
     assert len(dataframe) == 1
     assert (
-        dataframe.loc[2]["text"]["data"]
+        dataframe.loc[2]["text_data"]
         == "This a valid sentence which should be still there"
     )
diff --git a/components/image_cropping/README.md b/components/image_cropping/README.md
index 5d679c457..e59af3af6 100644
--- a/components/image_cropping/README.md
+++ b/components/image_cropping/README.md
@@ -22,15 +22,13 @@ right side is border-cropped image.
 
 **This component consumes:**
 
-- images
-    - data: binary
+- images_data: binary
 
 **This component produces:**
 
-- images
-    - data: binary
-    - width: int32
-    - height: int32
+- images_data: binary
+- images_width: int32
+- images_height: int32
 
 ### Arguments
 
diff --git a/components/image_cropping/fondant_component.yaml b/components/image_cropping/fondant_component.yaml
index 416bc2c1d..130b14324 100644
--- a/components/image_cropping/fondant_component.yaml
+++ b/components/image_cropping/fondant_component.yaml
@@ -20,20 +20,16 @@ tags:
   - Image processing
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  images:
-    fields:
-      data:
-        type: binary
-      width:
-        type: int32
-      height:
-        type: int32
+  images_data:
+    type: binary
+  images_width:
+    type: int32
+  images_height:
+    type: int32
 
 args:
   cropping_threshold:
diff --git a/components/image_cropping/src/main.py b/components/image_cropping/src/main.py
index c670fdeb8..6a62e309c 100644
--- a/components/image_cropping/src/main.py
+++ b/components/image_cropping/src/main.py
@@ -46,12 +46,12 @@ def __init__(
 
     def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
         # crop images
-        dataframe["images"]["data"] = dataframe["images"]["data"].apply(
+        dataframe["images_data"] = dataframe["images_data"].apply(
             lambda image: remove_borders(image, self.cropping_threshold, self.padding),
         )
 
         # extract width and height
-        dataframe["images"][["width", "height"]] = dataframe["images"]["data"].apply(
+        dataframe["images_width", "images_height"] = dataframe["images_data"].apply(
             extract_dimensions,
             axis=1,
             result_type="expand",
diff --git a/components/image_resolution_extraction/README.md b/components/image_resolution_extraction/README.md
index a69a4df4e..77e11742d 100644
--- a/components/image_resolution_extraction/README.md
+++ b/components/image_resolution_extraction/README.md
@@ -7,15 +7,13 @@ Component that extracts image resolution data from the images
 
 **This component consumes:**
 
-- images
-    - data: binary
+- images_data: binary
 
 **This component produces:**
 
-- images
-    - data: binary
-    - width: int32
-    - height: int32
+- images_data: binary
+- images_width: int32
+- images_height: int32
 
 ### Arguments
 
diff --git a/components/image_resolution_extraction/fondant_component.yaml b/components/image_resolution_extraction/fondant_component.yaml
index 1ddbf4afb..f840da680 100644
--- a/components/image_resolution_extraction/fondant_component.yaml
+++ b/components/image_resolution_extraction/fondant_component.yaml
@@ -5,17 +5,13 @@ tags:
   - Image processing
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  images:
-    fields:
-      data:
-        type: binary
-      width:
-        type: int32
-      height:
-        type: int32
\ No newline at end of file
+  images_data:
+    type: binary
+  images_width:
+    type: int32
+  images_height:
+    type: int32
\ No newline at end of file
diff --git a/components/image_resolution_extraction/src/main.py b/components/image_resolution_extraction/src/main.py
index 823b7b70f..a8715d831 100644
--- a/components/image_resolution_extraction/src/main.py
+++ b/components/image_resolution_extraction/src/main.py
@@ -38,8 +38,9 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
         """
         logger.info("Filtering dataset...")
 
-        dataframe[[("images", "width"), ("images", "height")]] = dataframe[
-            [("images", "data")]
-        ].apply(lambda x: extract_dimensions(x.images.data), axis=1)
+        dataframe["images_width", "images_height"] = dataframe[["images_data"]].apply(
+            lambda x: extract_dimensions(x.images.data),
+            axis=1,
+        )
 
         return dataframe
diff --git a/components/index_weaviate/README.md b/components/index_weaviate/README.md
index ce4729c52..efa6286a0 100644
--- a/components/index_weaviate/README.md
+++ b/components/index_weaviate/README.md
@@ -7,9 +7,8 @@ Component that takes embeddings of text snippets and indexes them into a weaviat
 
 **This component consumes:**
 
-- text
-    - data: string
-    - embedding: list<item: float>
+- text_data: string
+- text_embedding: list<item: float>
 
 **This component produces no data.**
 
diff --git a/components/index_weaviate/fondant_component.yaml b/components/index_weaviate/fondant_component.yaml
index d20d168fd..cb06ad683 100644
--- a/components/index_weaviate/fondant_component.yaml
+++ b/components/index_weaviate/fondant_component.yaml
@@ -5,14 +5,12 @@ tags:
   - Data writing
 
 consumes:
-  text:
-    fields:
-      data:
-        type: string
-      embedding:
-        type: array
-        items:
-          type: float32
+  text_data:
+    type: string
+  text_embedding:
+      type: array
+      items:
+        type: float32
 
 args:
   weaviate_url:
diff --git a/components/language_filter/README.md b/components/language_filter/README.md
index c3afd6435..3aebe1e26 100644
--- a/components/language_filter/README.md
+++ b/components/language_filter/README.md
@@ -7,8 +7,7 @@ A component that filters text based on the provided language.
 
 **This component consumes:**
 
-- text
-    - data: string
+- text_data: string
 
 **This component produces no data.**
 
diff --git a/components/language_filter/fondant_component.yaml b/components/language_filter/fondant_component.yaml
index ab59a58be..3a98f27f7 100644
--- a/components/language_filter/fondant_component.yaml
+++ b/components/language_filter/fondant_component.yaml
@@ -5,10 +5,8 @@ tags:
   - Text processing
 
 consumes:
-  text:
-    fields:
-      data:
-        type: string
+  text_data:
+      type: string
 
 args:
   language:
diff --git a/components/language_filter/src/main.py b/components/language_filter/src/main.py
index f306512e4..4c753d1b4 100644
--- a/components/language_filter/src/main.py
+++ b/components/language_filter/src/main.py
@@ -38,7 +38,7 @@ def predict_lang(self, text: str):
 
     def is_language(self, row):
         """Predict if text of a row is written in the defined language."""
-        return self.language in self.predict_lang(row["text"])
+        return self.language in self.predict_lang(row["text_data"])
 
 
 class LanguageFilterComponent(PandasTransformComponent):
diff --git a/components/load_from_files/README.md b/components/load_from_files/README.md
index 834f568e5..9a618f176 100644
--- a/components/load_from_files/README.md
+++ b/components/load_from_files/README.md
@@ -11,9 +11,8 @@ location. It supports the following formats: .zip, gzip, tar and tar.gz.
 
 **This component produces:**
 
-- file
-    - filename: string
-    - content: binary
+- file_filename: string
+- file_content: binary
 
 ### Arguments
 
diff --git a/components/load_from_files/fondant_component.yaml b/components/load_from_files/fondant_component.yaml
index 11416e5b5..2e0167b9d 100644
--- a/components/load_from_files/fondant_component.yaml
+++ b/components/load_from_files/fondant_component.yaml
@@ -7,13 +7,11 @@ tags:
   - Data loading
 
 produces:
-  file:
-    fields:
-      filename:
-        type: string
-      content:
-        type: binary
-      
+  file_filename:
+    type: string
+  file_content:
+    type: binary
+
 args:
   directory_uri:
     description: Local or remote path to the directory containing the files
diff --git a/components/load_from_hf_hub/README.md b/components/load_from_hf_hub/README.md
index 1faa0175a..e14e6f440 100644
--- a/components/load_from_hf_hub/README.md
+++ b/components/load_from_hf_hub/README.md
@@ -9,8 +9,7 @@ Component that loads a dataset from the hub
 
 **This component produces:**
 
-- dummy_variable
-    - data: binary
+- dummy_variable: binary
 
 ### Arguments
 
diff --git a/components/load_from_hf_hub/fondant_component.yaml b/components/load_from_hf_hub/fondant_component.yaml
index d6a625971..7e72f2b22 100644
--- a/components/load_from_hf_hub/fondant_component.yaml
+++ b/components/load_from_hf_hub/fondant_component.yaml
@@ -5,10 +5,8 @@ tags:
   - Data loading
 
 produces:
-  dummy_variable:  #TODO: fill in here
-    fields:
-      data:
-        type: binary
+  dummy_variable: #TODO: fill in here
+    type: binary
 
 args:
   dataset_name:
@@ -19,10 +17,10 @@ args:
     type: dict
     default: {}
   image_column_names:
-    description: Optional argument, a list containing the original image column names in case the 
+    description: Optional argument, a list containing the original image column names in case the
       dataset on the hub contains them. Used to format the image from HF hub format to a byte string.
     type: list
-    default: []
+    default: [ ]
   n_rows_to_load:
     description: Optional argument that defines the number of rows to load. Useful for testing pipeline runs on a small scale
     type: int
diff --git a/components/load_from_hf_hub/src/main.py b/components/load_from_hf_hub/src/main.py
index b978a96af..ccb2dd2ab 100644
--- a/components/load_from_hf_hub/src/main.py
+++ b/components/load_from_hf_hub/src/main.py
@@ -54,16 +54,12 @@ def get_columns_to_keep(self) -> t.List[str]:
         else:
             invert_column_name_mapping = {}
 
-        for subset_name, subset in self.spec.produces.items():
-            for field_name, field in subset.fields.items():
-                column_name = f"{subset_name}_{field_name}"
-                if (
-                    invert_column_name_mapping
-                    and column_name in invert_column_name_mapping
-                ):
-                    columns.append(invert_column_name_mapping[column_name])
-                else:
-                    columns.append(column_name)
+        for field_name, field in self.spec.produces.items():
+            column_name = field_name
+            if invert_column_name_mapping and column_name in invert_column_name_mapping:
+                columns.append(invert_column_name_mapping[column_name])
+            else:
+                columns.append(column_name)
 
         if self.index_column is not None:
             columns.append(self.index_column)
@@ -99,11 +95,10 @@ def _set_unique_index(dataframe: pd.DataFrame, partition_info=None):
 
             def _get_meta_df() -> pd.DataFrame:
                 meta_dict = {"id": pd.Series(dtype="object")}
-                for subset_name, subset in self.spec.produces.items():
-                    for field_name, field in subset.fields.items():
-                        meta_dict[f"{subset_name}_{field_name}"] = pd.Series(
-                            dtype=pd.ArrowDtype(field.type.value),
-                        )
+                for field_name, field in self.spec.produces.items():
+                    meta_dict[field_name] = pd.Series(
+                        dtype=pd.ArrowDtype(field.type.value),
+                    )
                 return pd.DataFrame(meta_dict).set_index("id")
 
             meta = _get_meta_df()
diff --git a/components/load_from_parquet/README.md b/components/load_from_parquet/README.md
index c83f7e9e8..d6bda66c3 100644
--- a/components/load_from_parquet/README.md
+++ b/components/load_from_parquet/README.md
@@ -9,8 +9,7 @@ Component that loads a dataset from a parquet uri
 
 **This component produces:**
 
-- dummy_variable
-    - data: binary
+- dummy_variable: binary
 
 ### Arguments
 
diff --git a/components/load_from_parquet/fondant_component.yaml b/components/load_from_parquet/fondant_component.yaml
index 5cc5796fa..894069c59 100644
--- a/components/load_from_parquet/fondant_component.yaml
+++ b/components/load_from_parquet/fondant_component.yaml
@@ -5,10 +5,8 @@ tags:
   - Data loading
 
 produces:
-  dummy_variable:  #TODO: fill in here
-    fields:
-      data:
-        type: binary
+  dummy_variable:
+    type: binary
 
 args:
   dataset_uri:
diff --git a/components/load_from_parquet/src/main.py b/components/load_from_parquet/src/main.py
index ddd338552..117ae10ce 100644
--- a/components/load_from_parquet/src/main.py
+++ b/components/load_from_parquet/src/main.py
@@ -50,16 +50,12 @@ def get_columns_to_keep(self) -> t.List[str]:
         else:
             invert_column_name_mapping = {}
 
-        for subset_name, subset in self.spec.produces.items():
-            for field_name, field in subset.fields.items():
-                column_name = f"{subset_name}_{field_name}"
-                if (
-                    invert_column_name_mapping
-                    and column_name in invert_column_name_mapping
-                ):
-                    columns.append(invert_column_name_mapping[column_name])
-                else:
-                    columns.append(column_name)
+        for field_name, field in self.spec.produces.items():
+            column_name = field_name
+            if invert_column_name_mapping and column_name in invert_column_name_mapping:
+                columns.append(invert_column_name_mapping[column_name])
+            else:
+                columns.append(column_name)
 
         if self.index_column is not None:
             columns.append(self.index_column)
@@ -85,11 +81,10 @@ def _set_unique_index(dataframe: pd.DataFrame, partition_info=None):
 
             def _get_meta_df() -> pd.DataFrame:
                 meta_dict = {"id": pd.Series(dtype="object")}
-                for subset_name, subset in self.spec.produces.items():
-                    for field_name, field in subset.fields.items():
-                        meta_dict[f"{subset_name}_{field_name}"] = pd.Series(
-                            dtype=pd.ArrowDtype(field.type.value),
-                        )
+                for field_name, field in self.spec.produces.items():
+                    meta_dict[field_name] = pd.Series(
+                        dtype=pd.ArrowDtype(field.type.value),
+                    )
                 return pd.DataFrame(meta_dict).set_index("id")
 
             meta = _get_meta_df()
diff --git a/components/minhash_generator/README.md b/components/minhash_generator/README.md
index 422fdc7af..5fc4cb86e 100644
--- a/components/minhash_generator/README.md
+++ b/components/minhash_generator/README.md
@@ -7,13 +7,11 @@ A component that generates minhashes of text.
 
 **This component consumes:**
 
-- text
-    - data: string
+- text_data: string
 
 **This component produces:**
 
-- text
-    - minhash: list<item: uint64>
+- text_minhash: list<item: uint64>
 
 ### Arguments
 
diff --git a/components/minhash_generator/fondant_component.yaml b/components/minhash_generator/fondant_component.yaml
index 6528112ef..1747982f8 100644
--- a/components/minhash_generator/fondant_component.yaml
+++ b/components/minhash_generator/fondant_component.yaml
@@ -5,18 +5,14 @@ tags:
   - Text processing
 
 consumes:
-  text:
-    fields:
-      data:
-        type: string
+  text_data:
+    type: string
 
 produces:
-  text:
-    fields:
-      minhash:
-        type: array
-        items:
-            type: uint64
+  text_minhash:
+    type: array
+    items:
+      type: uint64
 args:
   shingle_ngram_size:
     description: Define size of ngram used for the shingle generation
diff --git a/components/minhash_generator/src/main.py b/components/minhash_generator/src/main.py
index c8034334b..f61e34fcb 100644
--- a/components/minhash_generator/src/main.py
+++ b/components/minhash_generator/src/main.py
@@ -51,10 +51,10 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
         Returns:
             Pandas dataframe
         """
-        dataframe[("text", "shingles")] = dataframe[("text", "data")].apply(
+        dataframe["text_shingles"] = dataframe["text_data"].apply(
             create_shingles,
         )
-        dataframe[("text", "minhash")] = dataframe[("text", "shingles")].apply(
+        dataframe["text_minhash"] = dataframe["text_shingles"].apply(
             compute_minhash,
         )
 
diff --git a/components/normalize_text/README.md b/components/normalize_text/README.md
index edc955a79..3609ba0de 100644
--- a/components/normalize_text/README.md
+++ b/components/normalize_text/README.md
@@ -19,8 +19,7 @@ the training of large language models.
 
 **This component consumes:**
 
-- text
-    - data: string
+- text_data: string
 
 **This component produces no data.**
 
diff --git a/components/normalize_text/fondant_component.yaml b/components/normalize_text/fondant_component.yaml
index d6551f578..fd9cfc4cb 100644
--- a/components/normalize_text/fondant_component.yaml
+++ b/components/normalize_text/fondant_component.yaml
@@ -17,10 +17,8 @@ tags:
   - Text processing
 
 consumes:
-  text:
-    fields:
-      data:
-        type: string
+  text_data:
+    type: string
 
 args:
   remove_additional_whitespaces:
diff --git a/components/normalize_text/src/main.py b/components/normalize_text/src/main.py
index 47220fba4..a98b7b36b 100644
--- a/components/normalize_text/src/main.py
+++ b/components/normalize_text/src/main.py
@@ -89,31 +89,31 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
             Pandas dataframe
         """
         if self.normalize_lines:
-            dataframe[("text", "data")] = dataframe[("text", "data")].apply(
+            dataframe["text_data"] = dataframe["text_data"].apply(
                 normalize_lines,
             )
 
         if self.do_lowercase:
-            dataframe[("text", "data")] = dataframe[("text", "data")].apply(
+            dataframe["text_data"] = dataframe["text_data"].apply(
                 lambda x: x.lower(),
             )
 
         if self.apply_nfc:
-            dataframe[("text", "data")] = dataframe[("text", "data")].apply(
+            dataframe["text_data"] = dataframe["text_data"].apply(
                 self._do_nfc_normalization,
             )
 
         if self.remove_punctuation:
-            dataframe[("text", "data")] = dataframe[("text", "data")].apply(
+            dataframe["text_data"] = dataframe["text_data"].apply(
                 _remove_punctuation,
             )
 
         if self.remove_additional_whitespaces:
-            dataframe[("text", "data")] = dataframe[("text", "data")].apply(
+            dataframe["text_data"] = dataframe["text_data"].apply(
                 _remove_additional_whitespaces,
             )
 
         # remove all empty rows
-        dataframe = dataframe[dataframe[("text", "data")].astype(bool)]
+        dataframe = dataframe[dataframe["text_data"].astype(bool)]
 
         return dataframe
diff --git a/components/prompt_based_laion_retrieval/README.md b/components/prompt_based_laion_retrieval/README.md
index af43a9826..8d7ffcf70 100644
--- a/components/prompt_based_laion_retrieval/README.md
+++ b/components/prompt_based_laion_retrieval/README.md
@@ -12,13 +12,11 @@ This component doesn’t return the actual images, only URLs.
 
 **This component consumes:**
 
-- prompts
-    - text: string
+- prompts_text: string
 
 **This component produces:**
 
-- images
-    - url: string
+- images_url: string
 
 ### Arguments
 
diff --git a/components/prompt_based_laion_retrieval/fondant_component.yaml b/components/prompt_based_laion_retrieval/fondant_component.yaml
index fdd7589dc..02ea08349 100644
--- a/components/prompt_based_laion_retrieval/fondant_component.yaml
+++ b/components/prompt_based_laion_retrieval/fondant_component.yaml
@@ -10,17 +10,13 @@ tags:
   - Data retrieval
 
 consumes:
-  prompts:
-    fields:
-      text:
-        type: string
+  prompts_text:
+    type: string
 
 produces:
-  images:
-    fields:
-      url:
-        type: string
-  additionalSubsets: false
+  images_url:
+    type: string
+#  additionalFields: false
 
 args:
   num_images:
diff --git a/components/prompt_based_laion_retrieval/src/main.py b/components/prompt_based_laion_retrieval/src/main.py
index c9459060f..2168f5ef0 100644
--- a/components/prompt_based_laion_retrieval/src/main.py
+++ b/components/prompt_based_laion_retrieval/src/main.py
@@ -56,18 +56,18 @@ async def async_query():
                         self.client.query,
                         prompt,
                     )
-                    for prompt in dataframe["prompts"]["text"]
+                    for prompt in dataframe["prompts_text"]
                 ]
                 for response in await asyncio.gather(*futures):
                     results.extend(response)
 
         loop.run_until_complete(async_query())
 
-        results_df = pd.DataFrame(results)[["id", "url"]]
+        results_df = pd.DataFrame(results)["id", "url"]
         results_df = results_df.set_index("id")
 
         # Cast the index to string
         results_df.index = results_df.index.astype(str)
-        results_df.columns = [["images"], ["url"]]
+        results_df.columns = ["images_url"]
 
         return results_df
diff --git a/components/resize_images/README.md b/components/resize_images/README.md
index 593b2ca76..89561e7a5 100644
--- a/components/resize_images/README.md
+++ b/components/resize_images/README.md
@@ -7,13 +7,11 @@ Component that resizes images based on given width and height
 
 **This component consumes:**
 
-- images
-    - data: binary
+- images_data: binary
 
 **This component produces:**
 
-- images
-    - data: binary
+- images_data: binary
 
 ### Arguments
 
diff --git a/components/resize_images/fondant_component.yaml b/components/resize_images/fondant_component.yaml
index 6ab866d12..6112815c4 100644
--- a/components/resize_images/fondant_component.yaml
+++ b/components/resize_images/fondant_component.yaml
@@ -5,16 +5,12 @@ tags:
   - Image processing
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 args:
   resize_width:
diff --git a/components/resize_images/src/main.py b/components/resize_images/src/main.py
index 434dd29db..d5d4207bb 100644
--- a/components/resize_images/src/main.py
+++ b/components/resize_images/src/main.py
@@ -29,6 +29,6 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
             axis=1,
         )
 
-        dataframe[("images", "data")] = result
+        dataframe["images_data"] = result
 
         return dataframe
diff --git a/components/segment_images/README.md b/components/segment_images/README.md
index 9f475d516..418eacb13 100644
--- a/components/segment_images/README.md
+++ b/components/segment_images/README.md
@@ -7,13 +7,11 @@ Component that creates segmentation masks for images using a model from the Hugg
 
 **This component consumes:**
 
-- images
-    - data: binary
+- images_data: binary
 
 **This component produces:**
 
-- segmentations
-    - data: binary
+- segmentations_data: binary
 
 ### Arguments
 
diff --git a/components/segment_images/fondant_component.yaml b/components/segment_images/fondant_component.yaml
index fca45e541..34fbd9fcd 100644
--- a/components/segment_images/fondant_component.yaml
+++ b/components/segment_images/fondant_component.yaml
@@ -5,16 +5,12 @@ tags:
   - Image processing
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  segmentations:
-    fields:
-      data:
-        type: binary
+  segmentations_data:
+    type: binary
 
 args:
   model_id:
diff --git a/components/segment_images/src/main.py b/components/segment_images/src/main.py
index 0f8f46faa..4e06c5d89 100644
--- a/components/segment_images/src/main.py
+++ b/components/segment_images/src/main.py
@@ -150,4 +150,4 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
 
                 results.append(segmentations)
 
-        return pd.concat(results).to_frame(name=("segmentations", "data"))
+        return pd.concat(results).to_frame(name=("segmentations_data"))
diff --git a/components/write_to_hf_hub/README.md b/components/write_to_hf_hub/README.md
index 54978470a..ec80bf334 100644
--- a/components/write_to_hf_hub/README.md
+++ b/components/write_to_hf_hub/README.md
@@ -7,8 +7,7 @@ Component that writes a dataset to the hub
 
 **This component consumes:**
 
-- dummy_variable
-    - data: binary
+- dummy_variable: binary
 
 **This component produces no data.**
 
diff --git a/components/write_to_hf_hub/fondant_component.yaml b/components/write_to_hf_hub/fondant_component.yaml
index 363f2507c..b4391fbbc 100644
--- a/components/write_to_hf_hub/fondant_component.yaml
+++ b/components/write_to_hf_hub/fondant_component.yaml
@@ -5,10 +5,8 @@ tags:
   - Data writing
 
 consumes:
-  dummy_variable:  #TODO: fill in here
-    fields:
-      data:
-        type: binary
+  dummy_variable:
+    type: binary
 
 args:
   hf_token:
@@ -23,7 +21,7 @@ args:
   image_column_names:
     description: A list containing the image column names. Used to format to image to HF hub format
     type: list
-    default: []
+    default: [ ]
   column_name_mapping:
     description: Mapping of the consumed fondant column names to the written hub column names
     type: dict
diff --git a/components/write_to_hf_hub/src/main.py b/components/write_to_hf_hub/src/main.py
index 0ed01b961..6d464f0f2 100644
--- a/components/write_to_hf_hub/src/main.py
+++ b/components/write_to_hf_hub/src/main.py
@@ -74,16 +74,15 @@ def write(
         # Get columns to write and schema
         write_columns = []
         schema_dict = {}
-        for subset_name, subset in self.spec.consumes.items():
-            for field in subset.fields.values():
-                column_name = f"{subset_name}_{field.name}"
-                write_columns.append(column_name)
-                if self.image_column_names and column_name in self.image_column_names:
-                    schema_dict[column_name] = datasets.Image()
-                else:
-                    schema_dict[column_name] = generate_from_arrow_type(
-                        field.type.value,
-                    )
+        for field_name, field in self.spec.consumes.items():
+            column_name = field.name
+            write_columns.append(column_name)
+            if self.image_column_names and column_name in self.image_column_names:
+                schema_dict[column_name] = datasets.Image()
+            else:
+                schema_dict[column_name] = generate_from_arrow_type(
+                    field.type.value,
+                )
 
         schema = datasets.Features(schema_dict).arrow_schema
         dataframe = dataframe[write_columns]
diff --git a/scripts/component_readme/readme_template.md b/scripts/component_readme/readme_template.md
index 1266b56d3..54ad2e417 100644
--- a/scripts/component_readme/readme_template.md
+++ b/scripts/component_readme/readme_template.md
@@ -8,11 +8,8 @@
 {% if consumes %}
 **This component consumes:**
 
-{% for subset_name, subset in consumes.items() %}
-- {{ subset_name }}
-{% for field in subset.fields.values() %}
-    - {{ field.name }}: {{ field.type.value }}
-{% endfor %}
+{% for field_name, field in consumes.items() %}
+- {{ field.name }}: {{ field.type.value }}
 {% endfor %}
 {% else %}
 **This component consumes no data.**
@@ -21,11 +18,8 @@
 {% if produces %}
 **This component produces:**
 
-{% for subset_name, subset in produces.items() %}
-- {{ subset_name }}
-{% for field in subset.fields.values() %}
-    - {{ field.name }}: {{ field.type.value }}
-{% endfor %}
+{% for field_name, field in produces.items() %}
+- {{ field.name }}: {{ field.type.value }}
 {% endfor %}
 {% else %}
 **This component produces no data.**
diff --git a/src/fondant/core/manifest.py b/src/fondant/core/manifest.py
index 013ce2b71..58c8ab045 100644
--- a/src/fondant/core/manifest.py
+++ b/src/fondant/core/manifest.py
@@ -188,7 +188,7 @@ def fields(self) -> t.Mapping[str, Field]:
             {
                 name: Field(
                     name=name,
-                    type=Type(field["type"]),
+                    type=Type.from_json(field),
                     location=field["location"],
                 )
                 for name, field in self._specification["fields"].items()
@@ -222,8 +222,8 @@ def _add_or_update_index(self, field: Field, overwrite: bool = True):
 
         if field.name != "index":
             msg = (
-                f"The field name is {field.name}. If you try to update the index, set the field"
-                f"name to `index`."
+                f"The field name is {field.name}. If you try to update the index, "  # nosec B608
+                f"set the field name to `index`."
             )
             raise ValueError(msg)
 
@@ -238,7 +238,7 @@ def remove_field(self, name: str) -> None:
 
         del self._specification["fields"][name]
 
-    def evolve(  # noqa : PLR0912 (too many branches)
+    def evolve(  # : PLR0912 (too many branches)
         self,
         component_spec: ComponentSpec,
         *,
diff --git a/src/fondant/core/schema.py b/src/fondant/core/schema.py
index dc940b5f7..2599b5de1 100644
--- a/src/fondant/core/schema.py
+++ b/src/fondant/core/schema.py
@@ -5,7 +5,6 @@
 import os
 import re
 import typing as t
-from dataclasses import dataclass
 from enum import Enum
 
 import pyarrow as pa
@@ -168,7 +167,7 @@ class Field:
     def __init__(
         self,
         name: str,
-        type: Type = None,
+        type: Type = Type("null"),
         location: str = "",
     ) -> None:
         self._name = name
diff --git a/src/fondant/pipeline/pipeline.py b/src/fondant/pipeline/pipeline.py
index 36f81b7db..05be61c17 100644
--- a/src/fondant/pipeline/pipeline.py
+++ b/src/fondant/pipeline/pipeline.py
@@ -443,13 +443,13 @@ def _validate_pipeline_definition(self, run_id: str):
             if not load_component:
                 # Check subset exists
                 for (
-                    component_subset_name,
-                    component_subset,
+                    component_field_name,
+                    component_field,
                 ) in component_spec.consumes.items():
-                    if component_subset_name not in manifest.subsets:
+                    if component_field_name not in manifest.fields:
                         msg = (
-                            f"Component '{component_spec.name}' is trying to invoke the subset "
-                            f"'{component_subset_name}', which has not been defined or created "
+                            f"Component '{component_spec.name}' is trying to invoke the field "
+                            f"'{component_field_name}', which has not been defined or created "
                             f"in the previous components."
                         )
                         raise InvalidPipelineDefinition(
@@ -457,36 +457,22 @@ def _validate_pipeline_definition(self, run_id: str):
                         )
 
                     # Get the corresponding manifest fields
-                    manifest_fields = manifest.subsets[component_subset_name].fields
-
-                    # Check fields
-                    for field_name, subset_field in component_subset.fields.items():
-                        # Check if invoked field exists
-                        if field_name not in manifest_fields:
-                            msg = (
-                                f"The invoked subset '{component_subset_name}' of the "
-                                f"'{component_spec.name}' component does not match the "
-                                f"previously created subset definition.\n The component is "
-                                f"trying to invoke the field '{field_name}' which has not been "
-                                f"previously defined. Current available fields are "
-                                f"{manifest_fields}\n"
-                            )
-                            raise InvalidPipelineDefinition(
-                                msg,
-                            )
-                        # Check if the invoked field schema matches the current schema
-                        if subset_field != manifest_fields[field_name]:
-                            msg = (
-                                f"The invoked subset '{component_subset_name}' of the "
-                                f"'{component_spec.name}' component does not match  the "
-                                f"previously created subset definition.\n The '{field_name}' "
-                                f"field is currently defined with the following schema:\n"
-                                f"{manifest_fields[field_name]}\nThe current component to "
-                                f"trying to invoke it with this schema:\n{subset_field}"
-                            )
-                            raise InvalidPipelineDefinition(
-                                msg,
-                            )
+                    manifest_field = manifest.fields[component_field_name]
+
+                    # Check if the invoked field schema matches the current schema
+                    if component_field.type != manifest_field.type:
+                        msg = (
+                            f"The invoked field '{component_field_name}' of the "
+                            f"'{component_spec.name}' component does not match  the "
+                            f"previously created field type.\n The '{manifest_field.name}' "
+                            f"field is currently defined with the following type:\n"
+                            f"{manifest_field.type}\nThe current component to "
+                            f"trying to invoke it with this type:\n{component_field.type}"
+                        )
+                        raise InvalidPipelineDefinition(
+                            msg,
+                        )
+
             manifest = manifest.evolve(component_spec, run_id=run_id)
             load_component = False
 
diff --git a/tests/component/test_data_io.py b/tests/component/test_data_io.py
index 30a4b7c10..d9dad121f 100644
--- a/tests/component/test_data_io.py
+++ b/tests/component/test_data_io.py
@@ -9,9 +9,7 @@
 from fondant.core.manifest import Manifest
 
 manifest_path = Path(__file__).parent / "examples/data/manifest.json"
-component_spec_path = (
-    Path(__file__).parent / "examples/data/components/1.yaml"
-)
+component_spec_path = Path(__file__).parent / "examples/data/components/1.yaml"
 
 NUMBER_OF_TEST_ROWS = 151
 
diff --git a/tests/examples/example_data/components/1.yaml b/tests/examples/example_data/components/1.yaml
deleted file mode 100644
index 0c245a512..000000000
--- a/tests/examples/example_data/components/1.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Test component 1
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  properties:
-    fields:
-      Name:
-        type: "string"
-      HP:
-        type: "int32"
-  types:
-    fields:
-      Type 1:
-        type: "string"
-      Type 2:
-        type: "string"
-
-produces:
-  properties:
-    fields:
-      Name:
-        type: "string"
-      HP:
-        type: "int32"
-  types:
-    fields:
-      Type 1:
-        type: "string"
-      Type 2:
-        type: "string"
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
\ No newline at end of file
diff --git a/tests/examples/example_data/manifest.json b/tests/examples/example_data/manifest.json
deleted file mode 100644
index 8fe4ef16b..000000000
--- a/tests/examples/example_data/manifest.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
-    "metadata": {
-      "pipeline_name": "test_pipeline",
-      "base_path": "tests/example_data/subsets_input",
-      "run_id": "test_pipeline_12345",
-      "component_id": "67890"
-    },
-    "index": {
-      "location": "/index"
-    },
-    "subsets": {
-      "properties": {
-        "location": "/properties",
-        "fields": {
-            "Name": {
-                "type": "string"
-            },
-            "HP": {
-                "type": "int32"
-            }
-        }
-      },
-      "types": {
-        "location": "/types",
-        "fields": {
-            "Type 1": {
-                "type": "string"
-            },
-            "Type 2": {
-                "type": "string"
-            }
-        }
-      }
-    }
-  }
\ No newline at end of file
diff --git a/tests/examples/example_data/raw/split.py b/tests/examples/example_data/raw/split.py
deleted file mode 100644
index ade466125..000000000
--- a/tests/examples/example_data/raw/split.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-This is a small script to split the raw data into different subsets to be used while testing.
-
-The data is the 151 first pokemon and the following fields are available:
-
-'id', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
-'Sp. Atk', 'Sp. Def', 'Speed', 'source', 'Legendary'
-
-
-"""
-from pathlib import Path
-
-import dask.dataframe as dd
-
-data_path = Path(__file__).parent
-output_path = Path(__file__).parent.parent
-
-
-def split_into_subsets():
-    # read in complete dataset
-    master_df = dd.read_parquet(path=data_path / "testset.parquet")
-    master_df = master_df.set_index("id", sorted=True)
-    master_df = master_df.repartition(divisions=[0, 50, 100, 151], force=True)
-
-    # create properties subset
-    properties_df = master_df[["Name", "HP"]]
-    properties_df.to_parquet(output_path / "component_1")
-
-    # create types subset
-    types_df = master_df[["Type 1", "Type 2"]]
-    types_df.to_parquet(output_path / "component_2")
-
-
-if __name__ == "__main__":
-    split_into_subsets()
diff --git a/tests/examples/example_data/raw/testset.parquet b/tests/examples/example_data/raw/testset.parquet
deleted file mode 100644
index e7b9c625f0c104d9fb7c08137912df65d1915cd9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 15048
zcmdU$3v?URna5{r$&qX+krT<tK_RlRWrxIxE!)8k1}s^A$gv$;emkVeNE%CHk2H!N
zww3U30!c$4BsdQsh1O&#rO>i0r726d9GWh)1lpxMx;-tW6xyEMF70NarCpZo?*ALf
zugC^^8gfnrKWXOPJ9F>-{`dRty)#aS-Cb@_7|ty>ysU0DJepx(mNU$gY6Z)vStdih
zM8RZgRg3~CK^9m7R6q@~K@QLWEm#Vc0UgK%%fSku2L_M_E&)c64_1OpfeEYvmx0y5
z3<`h+Tn-9B5wL<{Py*I~E5KT?4wQoRU;`)v<-i6iKqaUG8^M*J8q|PI;3`lHt_IhD
zYe5~b0|%%FPS5}vK@(^OF3<utgI3T6+QAmk0Xl&jbb)Tr1A0Lp=m!H}5Nrk4fo<S3
zU_00WIN$+Z-~&7u0)7wx0vHA(Km<V$0ul&=ogf0DAO_-K6pVp!kN~^DZg4%g0o({~
z0ux{l*b8n3w}5?MKlm&-0B!{b!ENAna0uK14ud1$PB00Mf@9#gR#kg~Nq+81{Tdj<
znGD0(jCC~fbcQTLmdU7^&NQmnRIxm!WPwy<-p`(!%Ea%Z8LX<9VKXun%B&?Sb#{(M
zyL6c@clipvA@34n{>n>Dt1eq@F0fo)SY$0OS#!nOb*1Y!l$F~mDyueLSzWW~s@kis
zxwg*ksCPCrHZ{9iHn+C5Z|UfCcXjvl_Vo`8ZoO{XXSVO)JYF9^<PQkLBVsTlg?C1x
zvH0lNcw*P?>u<R6rind!Z@y*U{?8t`_26x{AG+i4kvk`k9y@-5u|B6Y85H?dIwmXM
zm?8KK8F`Op<h`1g_q5jfq1vkYOS+uD4$jILG8la6@WU`Te`BTLLBovm{=K$UjdVv`
z^l(uw9$A7D*Jy&xF{yCW%ZK8urYswW^#Lvt<U&3^f2ocac`3x!u_m3ci;MW!DpS|a
zxDbhnJe#lU<03IBS8H~NTr?&LQJ%G$>bV#n2@5<MF}CqBE-d-jp=L)shSZ@5&u8KO
zeLU|ImD&2u+(<mcu{9<K5+Z^qvSxjU;PdkdR#0YZq*zR1JG5FhNAKohF)pU8G2%e*
z;wWP53iDh<8e^SIqcYp9#BCc!xKNZ;>+BJaG{RnE><~scZy?SV<8pxxM$BF4RWuOc
z#|&Bx+8kiNRoE{0qzD&!>z)br%gUuH0Szi<HM*T~o)6iI8D{UQVn)i<XhwvPpVgS^
zMSh4AA_;?r@vO<#_r}GrFs|+~hR|1$kFq_5ErQ=KCc^QdA$BR#r(CLu(cw$$2IC$s
zM%{0cL~XWyi_a&Z0jqRPq9g^`E#_W9<ilb-$g_F64lcq)+1*ODg7+4+8KI37;$rM2
zY))lRiUmkHr{EX)n9^c&#{IE~AhDvRjhDvMW@85*i3(hZU90biCVX*k(RC_R$>u26
zYPiq{ul#pYM??tnT$pE%n44fjQHTY2Hf*#<yj(~K6@AOtA&J5m=VuF*IhueF5e?dP
z5iS-`pkBKt!m$x(=i@w)2G(R3xe;y`7gR?z-hdQl3z#-Gn;B6KusJF&7%s3EI(Six
zh9gou#x5;{_F*C9K@z(ZUG~N>COo`y)YQ#~Vw_i!l$WZtnn;`%*{%8}Uh>6#%4bYX
z5pEPCF;Z}y23_Hm`!pR=DA9SZJ}mf1#{bZFiPBhDQctoPl_w&xZ<yLRe~1qXG36Ge
zRu#h-K3v$yMTYp8H&E<jw`rWXb<u<#KQLZFj-rcKA2vGqF~0<z|Bq=Z2$&)iLE7RY
z%D6s&2K#vaedsSGq8Q7{H9CxWF~NRc)6B&~xdt5{@^cY(t7c0`7-v`MoiOY!zG0K5
zIS~#i_vki9IFCnpm3}kEk;w1cX6lv1m=y7-ImsyUu!H0)`mL#9ocH^^@fha7cuZ36
z)zrs5(waXR+i0wFBW$g?S%?LtkjV1|tXiw`2Dp8<n_C6%h!BgT;dkpcOG86~I!6OT
z@6zTN-GM|viiFrVbvW`#4WHA)yF*c4`Ixbr3;B7GW3MxB#t|1C32@5w`erE-<vF%m
z)5UW>7ia&Tg6Ea>Dx7goE6utX7fHm~o7rqdP*`(XA6aj16M_w*zRR1yAS!Cmv?M~~
ztXPz*<2><QyI6+~LIk;@3|)s1io&gR-3UDre-{Y^2aSXa{>jvoz<BpbQFf!Ql?z9>
zeb4Hh0=!>Y5K!@>yz(Y}F9Jz~SN~4a0;_3rRE;8+3pbG>3BS5Pw*}EE@#RwbjmFfs
z={BS8*uF>9*@|$K{YSH%hjBS?EYa}3u`47+L~fitskcWEO5(0%`av#?x(k-;+i>p)
zugozzG1!HWQ1FTwM;H)rcnBiIc=pw@HhwHNCdnfMVI+N;+Q^hBWmh*U$W<_c{x;^K
zH;PRLO=>Puv~xi|IUC)6S>EeL>)Ar<-esoqySCt*b27tlB%{@EnY>;+#MV({hWkdN
z6g(P*2+!tr_t6|dCUnI86cO7c%r_#N?d-rLB86x=YLI<G@UnS&gs6DP$8OR!Qs8A*
zH#Z9Y0QB)w7(|Hk22jkSaZBD2wnWzykfJfxQ|9u@b4^k}==3mac#NI1oS@Fg?J#Dd
z6}VH+x$e}8K~c=;axJ!sih8Hh*)ufO)6;Vx*V)6ARYaQ#C-Vy|51+h;F+cvzT+5KN
z&|P%q#Ye92WQFf?^^7SBEtXTS<kedog?UeW^P8{c{qE6h>tyMyWwW{yVcuMxwCz_E
zc@HV9C)ZkkQzF}TPIDGC?MtlTdu*#APd06!kSv!p7@Mtc6caQ-WmhMyMOIImOAY(M
z`YLw0Y1(XTtv+QnbGR$U^CI;d_9BJIigdBu7MN`>YdA2^Y!9*1c2mj~psf8|-6i<R
zDgIMM-I?XAk;&h4)1TkU%*+}uJ9i(WSi$I6CO5ahwt{7x3~p}mj9!v^fH4o<vyt2L
z@WZ*eSk77BC_mpywxpGw$;^8?(|VxHI<?-kF|GCm4K<}UoKv(Kj<K?(UQ(#8VHqV$
z3r&SKL%B?WCBF_^4jq>HI&4LB%y-nq3g-J*i{7R+Gyj8xZQ$oP|6i?<xev!#IDQJy
z{^K;DeZa2(?Jv%N6@b>+{|#~h?G4@rv<F!Vz6SoFy_@+i?)4iS)1IarAcV_%nJ4l4
zdqC^_wWVgPv31O!aZLLXS~Ju7n)VNw+G^%Mk#`cuv_EM8KLoTdcoF;*&>n>LA8!Jc
zb_eril&5{h0H8h6_eyoxSCz^8Eid}(ME(O>U2Q+D8u583^$Ydy%`iMLe+@pJX;ghD
zS(>3WczP-_?_kdzN_o_h;)#hJI<r1wf2L(;U7<dw@jL%D;VfLy@@(YWzRJ5_-2ck+
z<JU~RR9LW5*?hwNOzh-T^Zmyje%$l*>q^|_uEec7{^9KAjrBitR{iv=`yTn#aLMG9
zq3)}(dj~_`edG_;r#k|FSi!D2akymsWdDs<o31FFyx)1IpxnJerMUIfS4wWv9%}Ud
z>OiZ7wLWy!?1Ed_En&%)9Aj^2@=mL)w^Uo-wPDen-kU}GxbWap8h(cW*qt1E8H!Tc
zFxS$CnKpkAK(UEZ+9`#G56MW<(w4LY(DstHnqA;BK-<#G0c|H60c{P}1KO5$0@@Z@
zfeS<dZMm`mW-NJoE8v&5m>&Q=psiaOm;!6SR&2k9Q(F2`zmWcY*6`t6$2Y1zNM#yT
zv`L^3nI%0H8N1KzN-3XJTxZreIg{#$BKxtD*0*cVW+*$3I1g8L9xvL~|6s61p~%%M
zmM>Ye*;4XC#hMphU30jw^?6Il-gi~c+qP|c%l>Vf`U^*^>lo$KdnNX8k$tlDtmXP&
zyj!!(T)oFq`oiPRx2h)p<;it9g*AIR+F$YY7N5+pzVDcoIitBFsqtx5-oY&E2iIEP
zsfEVr&0eH43u^qP(r_@d)o`7x@wp5|IVpM#DM}mrN<aaEoQ^C#S4~o&6p*5%5UEKI
z8E9-&15%XKBGpN8+Td>h^dO)HkaG_K8VRIcE1;1>qlQKTsZE=GQkg~*ZRlM9i(ok@
z(Pkc_qc%$V;j=&aL)*r5#iuikDw-1LLq<wZMMk>f&8OD2zP)al-gWrB)?-}*eG~jo
zPKWRNMmXp9dvuq$&C1f(&2^S5Z`|=v*=paN$1gu&(w{u4?0qq_{p~Y$i_y0FN#mEA
zt=sc&JhH|;RdHvD;lz%ErLWm;Y<_6)kxlt})#R4{uKP-L`yCDX-v(IgPg-Ub&$45)
z=p9L_{=O>j!7S?=P1YxzP(8iri*#r~)t^xs{tVCVmsNi=rTVp`I{7L>{T$CGJxEj1
zkF+K|Nhk7X(uK4o-6;Z45GTz^HyS(jpbwDVZGgrQX*fN0NP8MPr1vsFUQc>I0m!Rq
zOjQAjDx^QOuf0K*k70z=?}B<ik;<H}cY5k#dcXK`<-w!1x3dk0EN$I<)g>o{LtoqJ
z`ciB%vhMIV3b*tYE*q<SZi(&nmsdUSDa=}au*Ki{voD43i>zB}v%Ov)*_OZlX9YJL
zO>A2I!TrK`?C0K+$(@D&y6^h43EvCv)vh{q;t|g5em;Nut&H_4_pIJaocs`vElpDJ
zAF1+AW?7%!Vx4M*;OT8A!P8Tj1qGi{;PF$d!6^%VBtu~*H}4?9Nmi1N=6sTs<Rf{e
zU3%KZNvhR=f;GuL?bak6xjGF8lAYX^q{1__<RFStEhH+2l`K9vF2t@aOKEiSqlBE!
zG^&=Ricz=IQ`3cHJDKP96fQZnPI<Hb(0fcnq-2@?>LcaN&pu$(>++A=YM*!4GUn3E
z%WIY1Z9jH1w-@NyFV@waQ@wn=^yV|Q6CTx18h5U8*M0lVtDk*bbF+QTZRg5f&M4c<
z2$Rg!Ki@IY(&K78Wq*cg{(N7VqlB@(I50=ZuJC%ZJ(lv^(@XNsDy=W|TK9HC$n>@^
z(xU~1d@0kgA3|=Dh5R@6JRzqAM9fwQB(a{1Pg0WS&XtgsW8_uS^YXMOlgE+dBr6RI
zlJXiro(XwpM4R?Pcpnz?Kbk+kPG=fb`Z=Pdry?_O&YzlJmBr=*Y_s~;Q{j&0`l7G*
zM=!567TnsgSFKptnNh<{Og^k?oEpZ;rn_q3-G`R{v6FFVI)35nS(WiaMbUlvT4RaP
zKH_@q@2pwQriowfZ#m#@dR068+VU$Ne&E+fHAN3H)+0O4QwZ*sbhT%)@;+2ppW0?U
zwiODcH<T1gPhG^-K62GW(Ue?nevBm*NFCCTl%kkg4VnNCngIvM=|~}RG*WC0pm}qy
z(~@$e8YxE3NOAQFK(UxsR1}Y|1x~OHz<Fm>-J6nY!I*k`Ds#Rorl+FVd2?t+r*X+d
z;n_2lKgeuQ7pPbCXnUh;GEXZq?_^)TrTgH1=9TB#?z^RE>tk(Qw#4fh&Bu=wI!`l4
z|5#AByi9Q<&~jXL>G6sC&*(fwrO%8PY@X_ADN`B;JU{zo@bkBJp2?qj`N-1OEH1~v
z9sm45p`(SdzU7}4T8YQ2eCmO8a^duApY_KaWKM56$()|Lu*^DxzSWR?m!vXHKgEaL
z@(e5I?9FE%@XsDmEz=jwrow7DB`H^6AI)H_?+nj&A}cDzBi_`!@HCtE9BciNV0}<P
zFVgR{$o&@xCyb%eV3u3UBwtLhtdXUak^GAmtu&=t#9-g4n7G_}B4{!&`9<`YU){?4
z`H&A!(ULwip69bZF4h|7-y<L8@n7mOKHkCoVD39OhS9PNi$RzH6o3|YSwIbF>7N6%
zU@4%1tOL1VIiOj}0P=tlP?%c@E(IpA3Xo&124+wIEPx!W5EKC`z(!`qnaELSo}$GJ
zg~9b;1E7UVIUonE0F{6iCKN`=Dal!Dz$QT3APSu{Q&Q+8XQM?lxxNFCBa-tq0CIYA
zZWm|)<oM*!ZJ-@ch^ECC&9H9J1-bz_d@tw+1Au}8wm+ZvEt?-@1Aqd=Fc<+M2m%T&
z5(tBxAOa|u#6TR3f-yirCINPV-GJux8^KLr0_*`a@81k=0sFvyfR%y#cI_a3ZvzyR
z?f{3u5pXA%1V_O!a2%WfcY(XXJ>YZTUT`1yJoo~b0{0`Box;GS&li(FH2l)vz)fcw
z=@Fg$A-73SMb_WOQ_r)zXT!yc?b}0fQQVF<3M1PEy#5;BzCG!rcqR9g);ck6JrY9f
zNbl|<Mpz(rDDppCR>c^zqTT=><P0Xfr7OyycYlmQg9G`&YZ23Gf&t}()bqmhB-?Zn
zU7viEk-ArNNAe)G4N9I&uaQ${sfE^@Gfe8qvt;B?9;X5*#sVstja<phh0Mj^1!le}
z|D8|w&8GE&JgdGy&t)&%gUe6<>C~tEVB{j~!Y{_g?_%spF2<_*(_heCgw@txwRW;z
z<nW&N)+CDvXjSDsAz%-ezs{Hy#$Oz8QRAtlyDiBn#Vg?*e<*6%WZr67V8XF%TW7W;
z%OvL@UM{-Z62dDvTtqVzN(>3S=-WO&Em_s}7~U|Upd}s>ypoT%pmZo63@2tXB+oG3
zH&c2Lf3)D^VjPO^w(ubo$2%mtwx_$XwAymR4YQ4z-wKQ1qwD9jB9(StD}+#NV`X|9
zW|HSLfx33XI<zF!EepMcOs{4-^}LGyvhJqUm3%LkUfXQ?r%<15k^1EH3tS()>{y_>
z>E!cjo6%?f(3wg8DA}4j((6g4UBF6`&v?@-noYlem8Bjqrq?zn;{sNfTAig=Hz(r)
zSBH))TwN;Tf>wto3ky}2O1^+qCHMd7b<L(<z{+Me_UZM_%Q~-0k0i-{Ip5KyQ$Nbj
z7MeZMg7PAT&TGVsU(avFO!E27kmrR6&*2Z4<her>@TV@31Wg@b{4EduR*8%-N`HtZ
zg($VWe0|yaG7G}HB~|sDOdImbEjKVmjot0u(zhwOIWoR;=?41P{G07|dncd|jw|gB
zzrE3J_uK3Ja+<?2FMo^@8)otw{msd9e9L!{^IdlPm>t*QSH8)Z1IakI+v{B@@3_(x
z6yt+Uy_Nm8_EArBZ_E>F6SfErk4vl>@`QS$p2nJyf&TVLzb#d+yjtk*8uPn?sq$h?
z*b{V%d{gf(*RbU8tB4AoU}J2s%eB$fR8{V28jHH>TY_9)Z*;J+CO*(tE>b!9-u1Xo
z$PvKx0dILt8Q<@a^IW0cIM>%cI@r`BwAR}Nl#O|t#UWp_I40k>zGZYE7#0UAI+OW*
zy=4P^?trhUf!dBf^~MKmH6vVKRfy_xiM>5;p{&vsDzBN>_EcZ_M9WOOV2=@(K>ZsG
z5B80Z^xLWep1vO3*Di2<$@;uajj&ncuKo&ll^1PH=2exo52xCb-rle$)M;z4cabe(
zgZ=IRt}h#yvC()<Ydy7lwyy4^4HvYXYb05YY_m{Xe0^16>OX8n*T;Q*<zsyB9Qy`q
z1TNS+?E4Ga(<|HFKE9Z3=H*{_P0@$(=n%%;Jgr38{{A*W?q}K<bIs2m8|bT|u{?Ji
zm3xBXu*c>Wll{VY?RQko)zxEb**VzPUY5)+7ky2QxQ9D1GlrYXqORsSW4^wn$5Y`B
zc#^hVs7<4|M^wJwLUw{b`kVu)`qJBjF+DrZ=8yY@Z8Q?-TuigBM!W1k3yq<9^<04Q
z+}ZDjkG4k^Ys<X+i((hIIEXmmwIyTBU{Dl2P41zY*j3-sFxWRJdP42Vv2iilB({&W
z3J!;_DJtu-K%2U~^_70uDrv`y*CyCv;3M1QZt4wQv@s1k4fgde8mD_I9OB?bjMZLS
zO}RIi8Yc_P8}srnhGrw<;-IaU=BNapoCDJ1P<ok#;-lcP)g)t8Fg+Fx1Zzf9aqbh%
zY5nCja*XP?VZOJ;h7e;$2Kxg;{fWim_u^}i#pb@hHg2WX2Z_PH#xhTZE47{&4-bS|
z0^ZPwJlA6_g5#D^U$Wh{!Ty$ATwhIGUTX|DjJFQk<8AdVnA7LZ&x4{j)E@TOD&>2$
z36*25!wtd^)@oSi+3n4#br9BPPCDQ0uSbqV%eD?$zB%kTcY0}U7olZevmeViM+2QV
z(0Mzp_v$5l*?siYATI)uVYf>i6&p5CcBe~DY@x)Nr6!6}UgM&+p1q!yqxe+J+;2d>
zA1yrR-mjig8|I8bxzw(;8#YwCHjHlAfLw<?K-bs@=u2LY(t+LHL|@bXg8o)-7?R7|
z?LCxZul$(n<p!qwpUOv5<x|%?q>s5??n+OEJ??72{NQ(Z=U-nx@X4=lEaS%N+vZ>I
z{J8$h^;0Yz?5o-}*jZnF-t`~Xf7yQ1b1Ce1UjOY9Z8cz*Y5U7oonwEDF}oe>0+bj=
z$!a=v$nrIIr_LMd<v}G6S{iuMAG(zzx%Q!?hV!m>&0OEO@bxql96j<Lz=`qR>O_5O
zqxh2B>u}Re7alLuYftodfXbJ5;7cCwJqwOcxxWqa8d|<7&e0^fXead+Mt~i?b|>W~
zGv#vA{&W6#m$z0;;s^@K`z>6LB5>T~CuzUr@zspc9OfeUXIHUOZ#Y~kZz9d9O`<uu
XeawFc@5Mgh|IEmx`0IH58`l2^3-+UU

diff --git a/tests/examples/example_data/subsets_input/index/part.0.parquet b/tests/examples/example_data/subsets_input/index/part.0.parquet
deleted file mode 100644
index 5f0f7a24d5adea22cb816391407bf4f64a577320..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1701
zcmcJQQF9tq6vuA}p+plKYZpOLacyH%LV-XXI(_g$3jqU7h{CekiF<c9uw-{xVF}Po
z-hJx$34G`{J~$meg5yJ<`U!mZ!5RG${_ie{P3?^1gLme4?{fZU&pG$r1MJdfMhFVu
zhJ;6pI|38P6+*sQ3@(u61rk`k6eNqQVG;yOAOtRfFjxjJftSGwSOu?u%U}(>3a)_H
zz*QiC2zVVtK@7YB-UM;*7I+)H0}|j}AcFV6HE<oQgZIG(_yBweJ_1Rw2~yz3YWQ}V
z_#-o41tMrdFNy7~2_YdAkA}~bXn0}n6CoQfutLl}31DUmq065nR_-YA?MKO--)`?-
ztAD27mwvFm{QdN+jfa}?UGK-A%s+qjQ?EZ>iVMM5evO1;(SV@|fe4E{3`f3ST7SB@
z9{w){#FDrkA_NGgH7tZ6a57J#p^@6tErlz=>j9pHL|7~yi~q05`7YQx-^o=%V##R8
zQEW{aMSl&Ph=#4{(*t#2jxBp6-b#ES8k%@NnGk)`fv4-9aw6J_rNhS_n<hLC4c*iZ
zF53La2ky+_j-qZk?o5QQJ+_?Lxp~0)y6SSfrMrrzxXL{0>ED?zrsP#UpK@+{E~R0+
zH*^0?d2W7T#-XmbgCUAI5lw>)mEnw6@643pa4_M)j`iWl7}(sJ$)wV$w0H{ZJoU3;
zDxcaCPf2uzHk<d_xBUB#{<3s~k4d*gDcu8nd`fAK7Q3`eX_uC|o~>BCpij8*?zvv0
zkoji}FN&vEDV=Zus}}_8nNtT(#Yv}~Kk7(zO3Pii6+f<8=2)(@a#FT&%(h!Dv+Kq^
zqsU-Cs+#FhwN&e~>@>sL`9lTo%2Ll%XVsh4vWs`~J=Xr5`;WEubhJ~VhHRN8t27Up
zRd(fl_*L>5R+;cP^W2R?ZQGo9TB&9-shHOHt6WEn%tUV$(z;YMtNlT@Y84E{YW1~}
zr@OM$?8%jKx^o_PT9Ec9@Vi*MKC6v8?aVv|_F-@z*^%4RBPn}UGc}a<p22HYD`lKP
z`GK_6%&Qf?AFk!ojlQolyXC6e=CgG>%-+j3{=Kf5+%BitR+XO-&THy)>{?H?kGvd+
ziRap}HuLtEm8IGPrCk_%d+*<!?)2%nUaA!?)+n2*-Edeo=f$Y+=Q2C}x^d`t67LD#
zM7UaS#r=11>l7F6^}$)c6={!KsLChrCH_Du?@5Yt!Q7>HygpKWBNzP0ltdGbGW>e1
fyPK-xY<gde#Juqn{#POvkK>o_k02qt_=EZjHb$F@

diff --git a/tests/examples/example_data/subsets_input/index/part.1.parquet b/tests/examples/example_data/subsets_input/index/part.1.parquet
deleted file mode 100644
index be1028aaaf00e22c04ab54f275a1f5b9da8678ab..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1707
zcmcJQ&u<z>6vrnR6C&BFRdo?a9MGz@av&~X)+w&rX21q70c_)hT{dya{xU4yUAC|o
zh~<Bw>R%95J@n8+)MHOo5B(de-g~Ij{uBAmg55ZcROQf-J~PYvzJ2e_%mWT-Gs}g!
zZzJ5xr5a~&<b6WESqkUKom*t{&h0SCuSSUgw!nSx2`H>a!`sZ`Q|7S)%V*$o@CA4P
zC@6vw*adr_3@TtBh@c7{f*PoU1~>qR;0QFqF?a+XgBFm04B9{ePry@f0=@(t@C>M{
z(H)J<hrU=jg8lW<nAW%m7fHsWS4upZTlmDt-M3gF=6(%fHf}}Udyrb$Rg?SQrE7oe
z9DZ1TWS+?1J5T?-e0KM1!+O#C;idie&)y1)xBk;|k_#t<H4;h0LzcmXVn*ymH1^~2
z`fp3?(RV2zk;WwvE<`Y`VIc&8(*lV{MtaY5RHlSK3b8CCW+ak{<o}wS?ZQ}RJNbZ+
zL^>YvRM${P@!vxiyyY6^w4)E~vEz>T?bH+AGWe5pil3W0fo=xM1@Eek2_JWC+weHG
zOxx((w9O-T{F%obdDHRy84q7~?0B<l^FZsHy3g#6>8pn7tBa_Y|75<Il2!F)%7yKX
zl$Psn<^P@X+WgjxLsRt!Llkkr+m<#|hci~aH&chh!Gr}nH-{r@;4*7AyOG(*@RzVI
zQok;42pgOHC5f-lX7fPWp5J%;m*tPym~=~&(j&meLrU|s*rjDkyR_5|Y{lXYeZq`;
z*LsygW}h*FD1k0gI$;9VAPCkgr#7C7leQ$BwdFdc<u2Tc_eIAZE0tDW&Na@p{g$t}
zb?d+?YOtS)c4j1&s(mdt&1#Zxs^VQm?%DcG+!D(^-W7V9^qBdN4QV>6m8hjSwyjl~
zr<zmtm1Fo-gsfJXusDm{jZ<Uao&;K{YHM;aV;+l4M~v*mY!x!5T(rghpes5BOLba(
zqZH`AA~$<VrJQMB$DJ1B;|ct3)^5(K<F=Gt#K1l*=A(FuG(D4ZS2a^hm5wY{vtB9V
z49d^t&8DDN*nXHMWE%asuDLB=-?v|{+tb{mT;sp%nkiB_qiu@pjBs94ukBWQx_cJn
zKukPW&y88IznmghpQ}<~9PGWnH?8&QxL&FjZq}&Sy4&!yTt0|VKh9@s{knBJ?<Cd}
ztch^7!HWBD<JKuIJm`a~ek;-*vrwH)!AtCcQr43c=YqLQcY{9C=SC*XBU2JjdFt@1
hvFYE_J?~!d#Yin0KQ;eKB$7$|()|@C<Pd*TuK+oXsILG3

diff --git a/tests/examples/example_data/subsets_input/index/part.2.parquet b/tests/examples/example_data/subsets_input/index/part.2.parquet
deleted file mode 100644
index c745a39dcded7b118455891e378642c93534fe44..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1715
zcmcJQOOGN|6vyw-G&0F_>==tc!h%$^GYh5#8pbf0g%@U^3!$f*Rwzm*ZM~ohs)`yY
z8t4|^@5lJooiQ$4xbP!%df~#ITlXfK=$G(+3%VV~NsJ3`^1HV<|5N9jd+z~uX+6ya
zxziALvQp&?jyyuhH!DGnT)aT^iw^}!do4^lzyK!bf*!Dd4II!1F1P~*V7L|zdMv_-
zML2|I3?^U-j=(W^5zN3#;AQX%con<`UI%Z0H^E!rZSW3w7rY1F2OoeB!CmkX_!xWw
zJ_VnF&%qaK;p<<L`S@zFa)i@3BZ<wfb0IDikA}~bXjohLM9Ad_SRrP93}7}cgdTn>
zap{H}-~KjP{q_3pW3^}W+v0cj^S{qtxO`VLzc#-A!TR&WP5un;jh|NITriefC!tt0
zU}{_-Vnj~Dk<-<U-&Qul|D}Lf5|>4|0Kv45g%AWz=14R&R1MvhnG$?Fz_O5t5sSy-
z|7&u#3uA-r<WWLm$!N%x9Zeoae+eA(rlaYT1GR6BY-h+{OFYM$8h<;P;OC|TU)O!*
zn0I7bhmSL|EO@j{-O>)0ZS%+n-qdA|yl%VRl!vb~vfb&qxvz9})nj&B_he1><VDok
z-<g+Fva0S+xv<@r(saD5*?*=yH$O1rK$pG#07V@0mZ=Qn!IV|+PUXR%KW4!W^}*2W
zJItC+Z>Ba={0!Db>U+h_+~yWPBhgE=Ucc46Hox!a>FN_~j5|e2=^kL?X-c!S(4i$t
zJG9vGZH2;;K4!+7=X!-gW}h+qD84RGI%Wb^KM2+{rxu<H<5n}*Yl$^VOC7iso)m0r
zB$XRkF|%{1Y&Sf`shPLTf&zO_uu?;zSm`R6Nm^;<+A`jiM8i_2!d0Q<;oY2}G@oVu
zBds|ZR*Te>Y|B#0^|oS{JZT?(<y=}Rk6D~W?wz)_ZH;}cSg{nbkka=BrXxmrtT*y0
zT`X8ax8D)$yeZp_u2%GQPZH~fR4%1j=W!=_aeoZI<=XXWWz=e>7csC8lle%l)SUFh
z%vsIUl$(1dt643Va0aC#ajTwF%WOYP%cXX@b6s&7p1N(_Teqt?dzqbouWKqbODSbb
zU}uE$nz${eVyI5f&w-eDt{iGpe}5TCtQ^VB{K(&X_vWP9rK4K0l3%V-vQ%fsRWeyW
zMr}Wvu6Ao?d)`T`Cs-5VYW)@W-@>g^T)5u{XZ=>72D4C=P5w*lfl}6!6z77uLvQ$f
uq|S{@m`A20nsDX8ossTcQC;_n|HViw8b2}rO2p!E{L=jqBxDzVSAPN5#JEWS

diff --git a/tests/examples/example_data/subsets_input/properties/part.0.parquet b/tests/examples/example_data/subsets_input/properties/part.0.parquet
deleted file mode 100644
index 5217045981270e26d7f66f5757130739b23b27b6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3542
zcmcInUu+{s8J|sTXOpvYz9Vba;48-5IoCExZC`wCjvRFD#Q!dK9Oq&?hAPZ@*Y?Eg
zUEg{)j_(u+RXkJ$RXox@AXJ^8AcT;3LV~J7hzG<2FT5fINFd+=@l+vH@y)FBuQ?u|
zoR#0~>^I+h|9<n0ow8Jk3ohYbyzti4h9C>*DnjqNHl`6mDO;2A0NdsiyyOo?+@%l?
zv#P4woHsTD({u-$J*+EgaMrJCs-bf)b0I%@h)sn{hPEHr#I!V(3;Ju=w2Ujha7x1i
z%OC@li-ywJQq4Y5Ia4gE7VaAg*UF`A3#2VmRXt$8rmBkOo|(IcyS9$Gl~4)<Cebu5
zJXa=4TOD!4J>xYj%izjBA9rc4f-MVM?iCRxqzN-q-0rK`G=|(7%D893ZZNyv#rlAo
z_DiO0bh*!oWzxmXj?K-(c0woJ@HS*M=$PtI@OdHT4)@dRC88K6)_?!zm$~n{XL$kv
z&2wJ=16x(~#5_W;UYke86|c8T^fu=Wr8TvMi8&IyNPc!^u4-$2ayY#s>X56Z4!E7`
z1=4P7qrTm0akHrAp7mOE`s;;;En|zOpEWe!%-p7;5D4I!KdTu=kJ}7aiKg~7yQgvi
ze;J#2z&&zLyVT~5Vh*@5bZm`jGJ1y95s;t=oduxQFL(q`=r+3PW*$U0%OCtCem()C
z%y}-q9QLl^Q10!8jX$iGuNUvDuhhTY`{JLDzI5{qg?zK~y&q_QeK!WW2u1(ShXijh
z=|`TR=qll!Doh0Az%OP3f99h<n~L%m9R(NQpdn!b!Qh7nLh%0Hr0@Z^A-u#6-tRcq
zBAVvV>hzQgm3=&#_wlYfR8pdn1(19eun9;2ZUQ90=K<S*X8<n(J_EQ1$N(CECBR+4
z7GMt$1>}AF>VRc}(AUOKh^s0zPD>^7$15WL%vdLYZazT@i_gE~6w!S-{F?Oo3jg?9
z?^VA4<L?B#vB&TK@$qkd`PCo&_PJO6n1}v?>ix|Z5?tfzP2|d~c0Y6n{xKQ-hBx}p
z>Ck`S`iW)zx<`0>^0=R0n{;ush!eDklaRa%;0AaAQve<?4VVF30(b#Fz%1Z0zz?_r
zxC;0fU=APv0)USLL_iSm3BV@-A;2{NCFE0pFyPaG2!N7v9dH9ci2#Zjk<Y^OIl%J(
zO7tRt688cS<s3PL;$sp%e5OVIctzy7u?{8bq7)YYr_=tcxifqbh4J+UA9(DJ{^e@)
zoy)-2MPL6Rf+z76+(;tPGiY|y9u_=E@ZOkkn(7AH2tmw(3j}aZ#uK8TfE)}3SN|_v
z_;e3r^r@CE@D(v2NBO`9SLP53E{L8!T-<md{>1etLg0((aIa~=RjCgmcfwzY5GC@;
zLO9}x_Sm}0RvtxkxWPb2w>1qkT13^9z4J21@}4#7Q^km_lcu4l5zyA{UVn75V95Jw
z)1vYoT=fdJFsMI@s5+>_R|DO<b3e2AQsn69G??>IMCg4=Eu4$vMEa2;p&1@6KtSW{
zBDwNKPmb(Eeu!?}UcQ*`iTF&q7o0%lFQnthKB5y=LQ}<-VX`u7L^iQGqE+sXunAxM
z)bW984v3*sX*|9ZTZ%=X1d*}pvx`f~rCX6BBzmPvWwUn2=}7T?{ssDpOKXxOZ2{=#
zo+K?xskW4nq_&i9Gg&HiZhc6F>nH2Et+ewFBeTNR^O7`_U>`i05qPpqFzy0U!`)i4
zyIU_x(%KOAr(VqWG`o?lF4q&K136c<WW7i>NlFHJH?PG8`E+4lP8`PNTC#;<Z=>GP
znxp*fe8z&k$&Osxq58H`I~;7JCDQ0=nw+h)<X*;VY=d4l8JDv|YG>@b)KYTVkgcT)
zak-~~{fd?+!#3E9%k@;UeyfsfW~p6?$ce&(My(Wc*5jIz%|QH>PJZ8jUu*?U?v)N2
zS;&FLlrB`O>$}!wy0Fr!o{Rk<)vw(ekdwI5h0WbseC)T;Z`2OE^~7;L61nfh|5W0Z
z_3A**L43J$@#J^XdFro+Yt=y`vtqOOAnts(v&Hh)QysZi=``ajF}0r3Xik(rWo>s{
zpNsVa&SItgEke1GYt`6pt<q7l>nEJGwuiKir@kofjEjpk!8ONDGn;`klX-ZSr*nSv
zs@2SCXL;_+`c|Uk<YzU2KiD7JJvkqTD|Lt?-z$w8wM<ODm8bE+`8w?H>V-~I@3ON1
zHep^kP)4jyiAKHf5Z6|0R)_ud!;O8(E&}&7*V7mqT2n9e<-{_xQQTgRZ|oOI%jrDY
zgHnqQYfJ$BB}qmZ`jfWPR-Or=Dd`Iv*+5$>t94=14sB&7+o9uHar~Z=I!xN6BlP3(
zhxV+r%?!5b1pxiol$eldIG!K~u-2ycL5}t_e6~)FtY=t|spI>AZDcCoiJfUj!nQC~
t=l$+oB!>Igg#R(D#b&?1$S$<-_`(W1S6Ofsejzx2r~tqG%kUrFzX2YBUC00c

diff --git a/tests/examples/example_data/subsets_input/properties/part.1.parquet b/tests/examples/example_data/subsets_input/properties/part.1.parquet
deleted file mode 100644
index ac842a070982a6f00c7fd496ab555a5916e220db..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3526
zcmcInO>84c74FX1iJdsJ*;U$V3xhmn#sqCL*iM|?3>iq<j{jmiwv+g8cF0zDm)#xr
z-?ZCtoKd7*0n%uNW=9-B8X<&sVTDE<_kdQb6%u>dJt9tAKycv70dav>-Oe8w4zNtg
zUsd<3SMR<0UcIti;c79)#r)pGJew*qB164GQNMI8Oi>h-mb(hJEceX%plg}3J_)iC
z)`ylvGi(K$eHm%)nfd-m90)yn#*@_*ddB^#g|r?Xc|f^h$|^<%_QG@4)Q5^}Ntg}<
zIa5HItYP{NThSGHh`RJm_l!@HO@*0VG?68_U<JpUh+bm1kci;Pon{r(L&r$<_I-k+
z8}tpTL{C#acZ;6cK&mkfOOjYo`i7|wEc*J*2F8Z0@gSnFhw8FmnHclfeHh%unuP>i
zcYnz{>oW&fq3_LSu`Uio_s0ua6CGj&n{JAt|3P8*_vb3QHnQA54%Uz%i>NQr@6Ycj
z`q0q5Kcsyg-qh(ogi5HZVO6#;Y-HAB=^Fj+%?vU-*b=0#iS&J68qD8%YaRw5^2Us3
z2V?iUY#I-{y4I!tNo8+HF7w)r_g#Y?T@CI?5VeR++80bI`lxR~JYSzfeM=e9pZRiV
zpuNn@VXccy`krrFlaJ^xvuPms7-RZvUv6Y*?zcC#OvLjelE^JtgG`zX1^Ja0p_^kS
znaSWPwJ?2766+#_gLeEk6VQwF<VzbNUm68-KV2;TalLZ0w2SXI-&5cC%gLJyk45>Z
z^uaUbuOAscP^Kb3@CF%QATdWx23S`ashFAYi~bL${onIOem52IJl|Ph5sn_kOi<9x
z!9Y>){$zrAk1jG{`zSu5UDK48roQT(a#0Jj9*P6R0TsY&fHoii$N=^MD}eP`k4q-2
zzT%zQpq5C0(Ds~x_^LDQa~fEWJf2}a<PeeOrxu<gg^5>x?vUetDfEbYhyC-T2T!8E
z2*%%8_+&o*I4QbU%ktw7e>?Hr&xw`DhqFP(HD+ZZSFt&H%jN&kwaA}lB2TA-|Ap)4
zQuUKb=GnE=s(o_J6(^fsCR7m`5`a|zq3LzN8qjiwXnd1s07=wcz_$S325bNbxk*3@
zkOpi52(ei}4v+^F09$|}padubwgDs|RX`1}1K0)Z0qTGTpb2OJNW$&`-T>SOd<Q@h
z_a*?%de(Wu95lfhNV+baTGn$q!+PRl9g?oAQkeXo&hpRZFJw&??W{fZ_<!Y&eDX@<
zZ`UDfSAG452%eX<KrunFlYK$LDq<!n#&>JNDVbYTF$gg;E)c+J89&G(<rf3N!218K
z#$N2PjJ#0#{Ga;$V#MSB{N;Iy3M{gd2GZbCW`FH^7?w2=AMFdeGElXC_)h3vSQf+g
z7eir3v~SO2d*)$SgF6azw1J|4Mn}eqxPMvZSl+ir22l(TG+EF^90qM|pc<pI37tQ{
zf<@#iw&2UkLZJRIj5Sb)i<hk3+s!P!9zHoa59V?dVRB;<3zy<JlYXH{$PP~yA)xW@
z!nw*-Pmb*0{K%TMx^i{DXW|Rnz2XFzzp@=i_642T6q*=Wx@ps_$ndRYjtI-f2$}FD
zNE{!MUnjaoq_NoT=<R42C<u>TU!1(1xV;=cp;#YRt8F*#I9191-SZ97VqBWzxGI3O
zEsk5^l3gytaa}IewPnfVrTHNdZl2BOsww9kdfSRUpXazC2kT(8jlgIxg1!Yv4qJ^x
zuhlGZTzUxWldt7fWzf#nSDNwiA)l*TyjGI8<s=XCUS5gz^QpoCA3uumjYJ2*+ICY?
zgi(GqpRr(VLgE{HM1LSQj{3zEC%08a;j^_4uV$?F4(MeQF+MvacE-NT9Wkd2?YUGT
z#;Xe0uPJ$X*adqrzL`ukmum?jOYCwoA1^#;H_B0GKBkD-48&iP@&|esj|vK}mJi!m
z*aL|vRjAiDTh?}}u-2(xiv5`AH<tVISzM{YcB>H^`)wQT#!;^sKiv-}?>h0nkhpoR
z-sf`=U+z*o`Mp%0_)}4%-fw5t26lW9cfKc8?fq*>iC1fq5L=7lW>O)0BKZ^3tua4W
z`GcHA%Li4N<c4q5qpe0w60@6UIqU2U36JN#NZ##ST;&8c96Lfb17{|4d@)a#{AhJc
z$SD`|Z17q&UUv3pwIP47KH54vA4h9Vh$F9-N9{%?$}i_hd~m*wjFwiA1g&SE1+WRv
zg+p;<^AvA43&*IjHn4d(*gPs8aDx)$p6vBJ#<n78WrL5e*fvT#E3x81N$xn6M{1Dk
zkS=WtpuRXxuRwj0a;oNSArvKfv3t8CrIl5BuxO8zvMt*qeY!M$PjZqi6-W>Dc-lzK
zayz!cF1Y}pJ_XJe+8T~02m-8i$$gL`^$e|gZe%lK2bny*5A21u3V5>5v?H;X*s9L^
q-MbVUGLZRTfUPCLFqZ5KEi}HcLe5ndSchK;&W9?)->eG!gZmecoorbE

diff --git a/tests/examples/example_data/subsets_input/properties/part.2.parquet b/tests/examples/example_data/subsets_input/properties/part.2.parquet
deleted file mode 100644
index 1d7df89dbdd9ac5c06af6e7be741f0c26d523b58..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3584
zcmcInTWlj&8J<aOubtSeTRhfSM96EJXsafvoy&H!%1Vr#_!c{HoWyqAE$Nw=W6#8M
z@y;c-vxJsAw6L^vxeB2Q2~>fE5aIz4tX8P-21pf8JRl?<ctAq%f>59M&x~_xwhvHt
z<nPRx|9<}e{Ab1$u9{*z%u500cb7_x#88h@)UQ3aE>RR!(-liM_y9~-6)aeWE@E$p
z?c;V^u&r2V!3MSuxEY(*HB2wFWn}0*)S;iA&dZjnYYN6=6cq{u1qo^N2jV4J=*X6>
zwdwirn%-*3f$?A+<HOK6Ta^ZqZfNvJA}|wmjed&FfRc#^^f<eXv^G`{eJj2OGh}uo
z<eiP=b;HDnz8=}aNHh>V6Upg{h_N@r0;MBsZErkmA!A_E@s%x@s516LxMgEDUY1qw
zZ?A6&153h&2Z+Ui)~A)p$HIuW4-e^dxQyB=nj8sNWX+VlW07rS8@lOTV^Le~AfrnM
zCvpR1AW=8zYu8JtYar7LhE`<D(&<2O6(3;l3y~V?>IM${Jy?WnhQ|F_1w9rGS9D|0
z4qS<Bs7M=F-XDZvVp@ThBWvIu)a}5Sr)$zbdxFD|btG5=#rw~QrW*?C(~}dNVabB6
z>GU@vP1Ge3T#1yyzJa~t?23UbS(C5)G!C=2t|8foh5WLG=^M#1?pZy3lq3Q77>W@Y
zX&Q<iW_*lq;u>|;=bB^3PW^)H+?yjXO0@6thvUH&G?D+#bm=e4l_$#E__@aS)LVZ&
zdj9HNQU12{{U0iSyWgGuJxEXeC@{eUV~b(R7h^r^NX5)ZREqw2Ec!-Z>R*?p{O{&2
zHVxa6Vn!$!!f>M~cz$t|`4e4YlFs&gKzpuI0h(G4T=GzRAwM+-XaYDuCFJ+Wg!?w(
zeh?_n0A>JLz#1R|_yiyc_z>WAz>|QRfM-Mg<$Z8TF44V_IBHDuG)=7k?IDNtM~1T~
zb@d@sU_SMRn>z31xF_{1))O%+ONE)&MxOufy|=#Aul%xF6rW@Z{%`!`kK>TDufK1C
z@eFe|lCL-!`-dm`*67rm!KvSjP5c+GADYGoKIZ=DahJb7>JdqzFiBJk7T*R)02#0k
z=l~Re3eW&LpbNMSFaRc`+9DQgV&MQNJwP8Y02~7D06q#h0(=bc0^mi!#{r}Sp9Fjg
z@M*wj0G|bX4)A%vOMsUFUjTd&a2N0;z$<`P0bd5Z2Dk_K3Y6`u!%}>0cz@^aX8p$;
z)_+`DQnrg!Fz`R^_<PKm>dB&0z5D*?1Mk$2-#_(-%TT?GzTPE*hgC0DTBKN?DM(mF
zjE`c1lOt|pO;V)^h?(&K0d~!BB8ya1icQ3p|Bs#cXgBE8BTZoRE*q7m{L!C34ow@I
zW_?|xiO6Js;klELH4*ps1YNOJ%}m^gKbw%n#B<Z}giG3U_}JmxNoa7EfsAG=3P`kM
ztcZK(X|ClxYtSWv30so|cy%T~TC>&e;Dn*``|w&NbQN1jL>2<^I|;0TIJ~`x?#_00
z=6d4j=row~Q6$L8N-Ug<<AnO2A|We0nudUet4ri77d^SOclePt>)OJ_^`4N=Ecb#F
z$oz%nxU~1^#7Uun5qz{bX;x(3fR6)`<u1I);md_MKEQ@4>l&e^QnSg~WCBu<7`i^o
zoL!uqPaIKfkgHZV>NniJWdH4dnp`Pvh2ywQ0J+vUZh=d;xh%)ExlG%krPJs5JwjYP
z;paCq?lX*z6^CEoxE=?3aCeNr-BE&Z7m)7l))zawjWWlr^guuTOhHxbX0Eo-m|H*K
z^EHdt%JPPs=7HWRC`q%BDem)g{S;qcY$4EXHY7zD6s{Gr7U(WYe0_(=+hV<MmNJ~&
zR27BKRa?B8wVGQXms?EnxgN1I^u68^^GeU*Wr`_YRlt5#DagGx*h}$^^kQSax+vs`
zT~6lbinp8f^`y&BDPk@Q@mHn7zTU=zqQa}|2hAL;fy9(4)@rM})<&kd)T*6}{gBAl
z=S}$}u1s-bx1JjMZFZaWey1^aydF;8cH@5}ar0Wu<ns_;{#-nTolJrFQ&GKUHnU5%
z6CcD~=t!H+`n9ygt5r!zEhTXyt&la5`iU#M!~9&#AJi<lzP~Ax+VJ&Sa<^WU#N6sh
z&01SMlE+hDr0z~FF6IQ)T{}W93wtJec(zXG{Ae{x$SY^-+~u{+xpjAaRuk$6`pMms
z{n1})KpX{iebB6Dll**v#0UGU-`&-UlAv{*y#O|0UOW&7PM+qPjp8AyFWF8W_E-C*
zea<dJ?a5kCV{9sdw%+CE791Ppt%X!+zbv=h&Lcg@waBpI5TL&}POm_Jl6KlGI7Dbl
z^5BflKw2xObU<l`w6a6nA>&GU_?+e>hboW}`tkT8J<Dx52HWHSfc_LXhv-PSo*)RY
z)+Xmcp7b+Zo2N!rvrdrd<MY5#bVR_Dv!`8(qvD9V&o^&UY`lw%+cvglgl>1nInd(6
Y11s(xWwB-WPH=xz1^x+D;J@F00P_QnLjV8(

diff --git a/tests/examples/example_data/subsets_input/types/part.0.parquet b/tests/examples/example_data/subsets_input/types/part.0.parquet
deleted file mode 100644
index 6074b2fc3bd605b27f527b0d1b07e7ebc07c0fd2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3018
zcmcImOKcif6df=o*l`-$%}_{bOQ}QL)^_+9*N(i%00u*V*u*eE6FD=?7zWP_gpV<a
zEOnVh6=m5~7f}^eU3A%H7gcwax~RJ7qN`MD7u{x&MHfBq4JP=LN>x+Dna{oVop<kf
z?>@~2m-Vw2_Dd7{#FS#ES!SAHwyupb3=?k(t(FB3F{LS+rnY#YEjLH-9`01ExMk{1
zO;Ana(T%#&s_QnG)>OINZYpJyeL3E&cXWyH!~~_;V;OdxP0g_)JIPvEt80mw9i@bh
z5zrxO=2J7SqRDcenVj+jg26~M8qMu=bGh7usc4Sz1<BOjU(GId!Wo)mtWJk@tJjd-
ze%5Rk?LV3AKbyTjn!L8-)Xq7S$FODwmr2|h2Jd&+J1KUY7VDcK#TYf1#xKOwo}yPw
z<F}3#Fi-?Ei|q1HB@44wW3*$GmI}6u5>eyq<>$d`-%XoF#vp29TDi_Vg4O$U^mqvY
zTEcUa{V$XEsm1&Ih->Kt>8WKrva*jyQV^A_+Jg~`i5W97=CLytX5^fWu>hlh6*vRf
zfH7bkI15Yw=YZFM^S~r91xy340~Y`mumf)Z4!{Xq1l|N(z+1rEz&n5&co*;h?*TKw
zCBO?@24;cxfe(NWfjM9vSOBh^v#t7|4*B_w8(A=kt-es&VLS9VY^HvUotZtw3TNPZ
zV?SBXUvp1H1=srHxzul~8#C!G`F7!ncI%IWJF^cZ<@4%SU#ow9YfSC^Y{JD_`n$`l
zNOV~q+U#G9dcPU>KC`<1MFyv>mopV&9M)F3Dr*93W!Q;J=EF^ViAlMT^^hrWXtE#*
z$uM@&>2j{Vk`8!fd+`5N%>K)SUG&=Q&&Dn=jC0OmZ3wy~v>ZQJ?t2tnlJ~dEb+x1E
zEzec=#~wxU+@5oL4AVCC%hYq<qYIjh7`>yah}cnNRoXskGnlvAI8hMj>F7$iF3BE5
z>m9Ap8#wA>4M!ZYYjRtVgtmb2`yN?GIL-|+Hn$a<U-ldvyh!F~79Qi+A{<9D8M6PU
zY)F*{b4aRRp}`w?$$ZrQYUjg3oT$yP9h*nd+e?ci_Nc}K^N_@}D5_{v7TWbDRb5rY
zrqJvW(HlLX*{pX7*`C~NDRrG#{r&~tg3kjrJpHMMoeQCbpyz;bOmNxk4gRXpmyS=y
zu8^O<66H8<6Cl6q9Jk1YD_o4@DqN&OZQ<|{f0r272L1#WG2U^dEYzRixGslrxKk3`
z=^$K-K)74vL%YR78pamJhd)YaYNr&>FBSrsJ#jtX7WK4pLkWwp?<Q1VD-lW7#K68^
z<U>0G#+C|IwcJZAC1P!i4OK-xN8&pYzu!tlIHjbisu<7ih+3>&x{0`W$S=mbgr`4u
zW=C3AyVMs+`bA9ze^yN>-3s{pVj&zV1hb)XobYmr7)ahN@fn}t_p4GohWxYDM6F(t
zdr4K)GJB;sY9Kj9lKK2vv3(<wT-nJV$^9OQ=YuU}kXIy`lM7)r5nWSG&HI?Y9FIY#
z*uCN0BtFzA23B@a3#ktp_`7m`#YcHDUqV|^1T9~aBGlh5A#ZT_iUVEtS5gtO28TLw
zN}g)N^^AH;jS|1VTL`?wuZOFmmaUfkLv!l+j=&R5a<nc{&$r9#>agZ}MXjM0gPVq*
z&-;pewkpNf2J5(UvrDw?6nVc&bfbHLuFuJQ1m55|O&{zRU#8Zd=VTrYQQr(?{z01p
z@<?czUWt$S#9)Ht2d(xSMLk(9>$`M+pf)@w_oN=xI8Z7i?+N@$hvr>d+fUWFP8$1x
ztnrH+OKMrqG{nFn<w)OL^rvcRWoMHkJ;}-B5<ToUj%#e7U&-rcf_B<E@ef)TkEGS&
z>MpLd|Ik<nF`5{*FddYsfn*u4YcVu2tkh2*$!otq`+E{JX&S#tMuRWTab+VpH1}Z8
zX71E{D0PhXP$i8BV+akUGcgW#LulUZ$nE)ZqcKkpHh2HvavMjQa}{3;#-AzRTXO?n
G@qYuK0`iOi

diff --git a/tests/examples/example_data/subsets_input/types/part.1.parquet b/tests/examples/example_data/subsets_input/types/part.1.parquet
deleted file mode 100644
index 9d20d681fe5505a3fa1a79e52cc60b9a43283897..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3085
zcmcInOK%!i6dnx5B(WPO%21F{WsMx<h2z3t96NQ}3>bzXfDJYbkfhGMfWb4v1O{Uq
zS$(XcDypLHx{0bP$|8%Xt1i3hqH3$E%c_g2Y8PGf4|G-a+&h@ygjA}Uj{MEzoO93p
z&Uf#*!dpxsOuEP)9pn#=4Cy6_^8}&Iy9t6|+pVr4J5Fo4R$CXeNigw(rL;Zpl2q#r
zOEv1Q$>~sAnF7mJr!O^B$+0lYwuMf|5m+y@q&*ay&=kpPgKKkjtEF~Y2Fk=XS{;id
z$P}4bBxQ1zbd&DD3b8PWr|@}>2SMoky$QI>4%b=Y92ME13dM+2qk8Q82flOEC)DG^
zA_e~7eP@82_D5$4x8LX9>6^-2n4Iv+-g{2(U1#u5N6<4izkd;`C&&o`ZnN+p2>AZc
zLEZ&5UcqW4c19~k%GhN%PWx<$rW_Ym?dnlw+m@nej@Lp-tq%e@!NN%jBwr%wkxEw=
zCUKz8b1WqUzFI~z`G_;`zx&s(PUlqW<J3=If3bhw5&XveY;iO$?%{;@(M0gO$>2j*
zU}a3svDJO*CZ9MnpgLOX`%V{4oN^G6Q&TQtdB#IT0V{yZfK|ZSjK_5aMZAF`*1_^7
z;4Q%0fU5uo5Cg;k8-N5L319&!00&3|t^qQDEFcHi1Z)BFfC69}umdOpN&p^E22=o5
zz;(b4z)iqAfL*{XfH31(7eP35N%jM+rc5@T3hwh9dwd>x5aT5l##q5ge`W7D_t~p+
z(;Gq{_1R+PxAm?0?2dA?{7Ap?`_Zk1k7V^h<M9*ik1x$>lrMOHCP2Cd`)@*(@$x+I
zct4*EJ~<QoWh(G57>rvre<n)!+#RW*=mO~`$mxp{$J_iOkqH2U5fgoEazPZ3A-u9b
z;9q}vJ@OJa<o_$2_xl;I9Q1g9K7D~8{EI%fDHyWQ@qO#ML#c+W9PCOht*aXy>dM@8
zN|mXbi*uB1+Qohad+tz%peqn#bTthkYO12iyC-dS=3NVp9tfnmhAOpWg@S0KtDF6y
zqa`-sKty(3u>@JL1PH%FDF%eYiGjuzcaoP@siUK(WKOc6?DGxfIKgDZ{-4;ODvuU{
z)SyB;Fa3h~u>IxE2Mh6BZMN;1c~rw%Sw^uZH6EHrB*sNyMcayCwc1#9O%>ZhyN^US
z`$D_j>Y-%&O1q=B3}g+5mqJS+3e=zmQ;#~AqDv9#i11A_g~BF(#cogEd#7GUSGc~x
zFiakRu4@dl%*5(Ul40shypC<L*a?3R852W4$HeV#xZ^C?&oNAofpPG}N#KbG!My^A
z^(uUHuTsu}u><2{mpNVQR@u^Wna=HtsgflcS#?v5iD2L3v`~kOr<)>u5El7pO@Oi0
zazm5)+zOYpU~IG@@<kNimHC5CCeEl;UDHIiP!si}Ro#X-HX0V$9?CPAJ6DraS`Yi;
z>9DA4kiVdDYOfCY!eTiVEk_DbiA8xCRix9mt9&kG`@@>dCV_vU!8KcTrJvSBJ-1(F
zp$5b$o-UOV6>Bq|UaOT(a6d%xe59iedBxL3r5w|^jf6Uu_cMNpO@dCz!%=QBA2loV
z+8)$`>H`hJJ*Bi3!g*mnPG3_6z0{QB*l$&VH{=LahPoWAW#VWJj&)?z64r+4+x1q>
zDu1w7reEMUVhvF*G^FszoJOfD@JN#)u1hpZmXy*)HQ%r3rdEmMZ9iWMRro?fW)s78
ztZny@w%rOJ){t&^FW~igo{zxWdB*7j`z4fX4(55DhbbC)I`<FSlz|7Q=lWGX84@EL
z;s;tCm=z=4kc>UNKcF^vPw&fptTA0Jrw;{wt&4d#69<_l)6K$uKx_PzV^x!koGH@F
zI7fDSIh<)`)molGJ;^BOwsG8V3}bFVzoO4PhdV8W{KM9TSJY~(wg-3If6!PEVmC3^
zVt7!l2}xGrGm(TQ1}paCSM)jP&%vIAG+7*<N24Jh%P^8n4w`#7C|?}Q2d7Tr9;#;{
w!XAP{@l1qo&J^0WyNY#5GR;f)V4E8pTyyr3=3j>&4fY?g3%@(J;2(j%0ku*TeE<Le

diff --git a/tests/examples/example_data/subsets_input/types/part.2.parquet b/tests/examples/example_data/subsets_input/types/part.2.parquet
deleted file mode 100644
index b89ce72bddfee44cf89cfce47b605c86b1d0964a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3066
zcmcImOKclO7+yc(IK@#5>KYqaB3iXp4{5NSI5brbT|2R}uI(mn;<XJ`*q6O)@2<D&
zb!;aDN_iAYDdkl-RTT$>df>p3N>!0K0TNe4FGw6f;?4na;GbE?b<z|GQ7nJ6JO69`
z|C{+oyG$3Nq?`Q1LH_2*l24Ju69lof>LmzbQI!PCRD=<DPn%q;<rx|CHQT!Acsi6)
zYO<y1HHZIjzG-Tl>Ns$Cxv8|8x?{S;a+YL{g4c4ZC&-H6g4aa5<`_M~2$CBnDa9P|
z&J>zL!vnJgldCm#k|3AJ>?A3Zhe!|U3C$A6MsT2j3w*)s$Q37yvcr9lc$}JEJ({4a
z)#|0O%VKrGlevBS^w_uGcL*YU!xbWZ!T2G<6AXA(dxkU-C7piRf79i^?h0RXhP``3
z2PdI^f^-rv9D)Zyz+5HuEa}5+%o;<rp!8AVh?Eqkm_3hUY+3CA6{t$64=Hfo5N(Q#
z4HcBwDQE;cg58rP#PdO4;ODRJcAT!!Q_gYWo_-IixaHYfMGRN*m&1S28UAu4{JA?c
zyEovzh1~U!cU)PZ6>aQIms=#p9E3DB>L#k=UZMt&0ScfFXaH1z2G9XbfB`rUFafP`
zuiHWq+9<*XNIHNnpa<9lYyn;bYy(~bybQPicm;3~@G9Un!0UiF0G9x70^R~#2D}Zp
z0(b{-74R<L8sI&^`+yGs9|Ara_s(Aj_R#NR`++u4B<1^*2fRC)fLH9>_=#itNI@C<
z#y(%3gQq5Z3tVXF)5+{F^UFtatJ1mBXWH|>ZohEss;JzMZ+)Tue#e;n0er%L`a-0;
zf7+aii1+A@*MD~;{K;eCuSP@vB7^;QFPM!J0Z&ViC5<CJ1nE2K+&R=oiEIcm7!t*H
zL~gKxWC*_;3<c*ONk=@Y1Nr|F=KpEjFNeMU9}gTRh~Q+vV{p32wF2L{w<tvyrS7`W
zRNI=~qE1dcM=2t8ZgPUMMeA5EVa*n$bD9J;dRtY&qN+%$xV~FvM_#w!f&oLStt&!P
zlqj&)+nUiED4Kj7Zc!xHB#RR{iv#m5O47j`E)C?nwwgMAg4*7`pUmzoDEk&eF?MA#
zB>zv@penZ~A*p_acHG!Q*2D5gyB=)B1GU+*J?kjCH9LcBcXK=t4^fPZ!i<^{XEjaC
zx~lLdXZ8^3MvpViW(Nh^kj$3S)R8nAor+9FC}4x?FFmZBicd{b+eE-e7Yb+ClXhDM
zE{r{ce$m<jP17p?^gB({GjyUxr)auHCu>-iNbJ&gkZ^IJXXvCohY?4?dWNPuH2A?2
z2Z1Mcg0T!pbjobJQ7+{`*aH8=vy7&;E9v4)DVE>hmx>mz=ae%_f(LnnQ6nuTnW^)!
zZj@)^RStYBC0P}E%q)|#z&9@Q>>9Ffi)^=*P0~t5Q&m1)sPbCMs+<M8bUezZJ19<n
z?R-^SQaf0e%tU!jh4=-PQ93n<7v)Qdcxk#27t$y$t?;qT`3jql*!rj{rc;oAL1yaB
zn$*jvyq4dnq@f0sQ!-O5E|#q`$;@1}xGVQfWY12wltEs}%$ih6sLaBmvTxpd^g=oX
zJf${=a}(LPQI5?upcYgga1iZC#kmNM3+pl3oWg0vx|qazs{(mLj7WLFOLs1tM0>Eq
zBdruMH+0Xgw_;S-Zle@?h+a>~yjGBf=+K&avCXlFlQmoyuNN(0Ngd{VqpTTfd3wdx
zv&Bf6Ey!Yeaj=iovmL~3yUa#a#2Y>fcz+(uhhyzH`|$(kC6cfA*Lg4xgV$GL`G4T1
z1bHx8zE@#W5q_FM`2km5qpW9SL2uym0ky$<W<%^@j$@TlW|L#*+Bom}VmDi-+c`K7
zXpisbSWyK%Z}71h93yvjCYr71l<EqNdXkpVu&~o_G;J(HzoO|1gF9^r=?AR~uc+11
zY6C{xf6!Q9VmC3!64)tUhae|lx|o6{1}WC#S2XSSXa7t>m>jlWLEaEAP1Ay%95na9
zX=QESd~oO#?x9)^ENmBSidP~669#9VZ%fv3!7z^Fi*2HRaZT7)T5ujd7VJO4!bj&a
HeD(hg6`~ko

diff --git a/tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
deleted file mode 100644
index 389da55a1..000000000
--- a/tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: Second component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-
-  caption:
-    fields:
-      data:
-        type: string
-
-produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
deleted file mode 100644
index 3c996e9d6..000000000
--- a/tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: Second component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-
-  captions:
-    fields:
-      data:
-        type: string
-      description:
-        type: binary
-
-produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
deleted file mode 100644
index c02abbaa1..000000000
--- a/tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: Second component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: string
-
-  captions:
-    fields:
-      data:
-        type: string
-
-produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
deleted file mode 100644
index 3cda0cc6c..000000000
--- a/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: Fourth component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-
-  captions:
-    fields:
-      data:
-        type: string
-
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-produces:
-  images:
-    fields:
-      data:
-        type: binary
-  additionalSubsets: false
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
-  some_list:
-    description: Some list
-    type: list
-    items:
-      type: int
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
deleted file mode 100644
index 091a7d9d5..000000000
--- a/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Third component
-description: This is an example component
-image: example_component:latest
-
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-
-  captions:
-    fields:
-      data:
-        type: string
-
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
-
-produces:
-  images:
-    fields:
-      data:
-        type: binary
-  additionalSubsets: false
-
-args:
-  storage_args:
-    description: Storage arguments
-    type: str
diff --git a/tests/examples/example_specs/components/arguments/component.yaml b/tests/examples/example_specs/components/arguments/component.yaml
deleted file mode 100644
index 659ed0026..000000000
--- a/tests/examples/example_specs/components/arguments/component.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-
-args:
-  string_default_arg:
-    description: default string argument
-    type: str
-    default: foo
-  integer_default_arg:
-    description: default integer argument
-    type: int
-    default: 0
-  float_default_arg:
-    description: default float argument
-    type: float
-    default: 3.14
-  bool_false_default_arg:
-    description: default bool argument
-    type: bool
-    default: False
-  bool_true_default_arg:
-    description: default bool argument
-    type: bool
-    default: True
-  list_default_arg:
-    description: default list argument
-    type: list
-    default: ["foo", "bar"]
-  dict_default_arg:
-    description: default dict argument
-    type: dict
-    default: {"foo":1, "bar":2}
-  string_default_arg_none:
-    description: default string argument
-    type: str
-    default: None
-  integer_default_arg_none:
-    description: default integer argument
-    type: int
-    default: 0
-  float_default_arg_none:
-    description: default float argument
-    type: float
-    default: 0.0
-  bool_default_arg_none:
-    description: default bool argument
-    type: bool
-    default: False
-  list_default_arg_none:
-    description: default list argument
-    type: list
-    default: []
-  dict_default_arg_none:
-    description: default dict argument
-    type: dict
-    default: {}
-  override_default_arg:
-    description: argument with default python value type that can be overriden
-    type: str
-    default: foo
-  override_default_arg_with_none:
-    description: argument with default python type that can be overriden with None
-    type: str
-  optional_arg:
-    description: optional argument
-    type: str
-    default: None
diff --git a/tests/examples/example_specs/components/arguments/component_default_args.yaml b/tests/examples/example_specs/components/arguments/component_default_args.yaml
deleted file mode 100644
index 816211c04..000000000
--- a/tests/examples/example_specs/components/arguments/component_default_args.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: Example component
-description: This is an example component
-image: example_component:latest
-
-args:
-  string_default_arg:
-    description: default string argument
-    type: str
-    default: foo
-  integer_default_arg:
-    description: default integer argument
-    type: int
-    default: 1
-  float_default_arg:
-    description: default float argument
-    type: float
-    default: 3.14
-  bool_false_default_arg:
-    description: default bool argument
-    type: bool
-    default: False
-  bool_true_default_arg:
-    description: default bool argument
-    type: bool
-    default: True
-  list_default_arg:
-    description: default list argument
-    type: list
-    default: ["foo", "bar"]
-  dict_default_arg:
-    description: default dict argument
-    type: dict
-    default: {"foo":1, "bar":2}
-  string_default_arg_none:
-    description: default string argument
-    type: str
-    default: None
-  integer_default_arg_none:
-    description: default integer argument
-    type: int
-    default: None
-  float_default_arg_none:
-    description: default float argument
-    type: float
-    default: None
-  bool_default_arg_none:
-    description: default bool argument
-    type: bool
-    default: None
-  list_default_arg_none:
-    description: default list argument
-    type: list
-    default: None
-  dict_default_arg_none:
-    description: default dict argument
-    type: dict
-    default: None
-  override_default_arg:
-    description: argument with default python value type that can be overriden
-    type: str
-    default: foo
-  override_default_none_arg:
-    description: argument with default None value type that can be overriden with a valid python type
-    type: float
-    default: None
-  override_default_arg_with_none:
-    description: argument with default python type that can be overriden with None
-    type: str
-
diff --git a/tests/examples/example_specs/components/arguments/input_manifest.json b/tests/examples/example_specs/components/arguments/input_manifest.json
deleted file mode 100644
index 9ee2494f9..000000000
--- a/tests/examples/example_specs/components/arguments/input_manifest.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "example_pipeline",
-    "base_path": "tests/example_data/subsets_input/mock_base_path",
-    "run_id": "example_pipeline_123",
-    "component_id": "component_1",
-    "cache_key": "00"
-  },
-  "index": {
-    "location": "/component_1"
-  },
-  "fields": {
-    "data": {
-      "type": "binary",
-      "location": "/component_1"
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/examples/example_specs/components/input_manifest.json b/tests/examples/example_specs/components/input_manifest.json
deleted file mode 100644
index 80fa0b91d..000000000
--- a/tests/examples/example_specs/components/input_manifest.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "test_pipeline",
-    "base_path": "/bucket",
-    "run_id": "test_pipeline_12345",
-    "component_id": "67890"
-  },
-  "index": {
-    "location": "/example_component"
-  },
-  "fields": {
-    "data": {
-      "location": "/example_component",
-      "type": "binary"
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt b/tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt
deleted file mode 100644
index 768ddfb21..000000000
--- a/tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt
+++ /dev/null
@@ -1 +0,0 @@
-tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
\ No newline at end of file
diff --git a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
deleted file mode 100644
index 47c2fe949..000000000
--- a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "example_pipeline",
-    "base_path": "tests/example_data/subsets_input/mock_base_path",
-    "run_id": "example_pipeline_2023",
-    "component_id": "component_1",
-    "cache_key": "42"
-  },
-  "index": {
-    "location": "/component_1"
-  },
-  "fields":
-  {
-    "data": {
-      "type": "binary",
-      "location": "/component_1"
-    },
-    "height": {
-      "type": "int32",
-      "location": "/component_1"
-    },
-    "width": {
-      "type": "int32",
-      "location": "/component_1"
-    },
-    "captions": {
-      "type": "string",
-      "location": "/component_1"
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json
deleted file mode 100644
index 78cfec59a..000000000
--- a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json
+++ /dev/null
@@ -1,36 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "example_pipeline",
-    "base_path": "tests/example_data/subsets_input/mock_base_path",
-    "run_id": "example_pipeline_2023",
-    "component_id": "component_2",
-    "cache_key": "42"
-  },
-  "index": {
-    "location": "/index"
-  },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": {
-        "data": {
-          "type": "binary"
-        },
-        "height": {
-          "type": "int32"
-        },
-        "width": {
-          "type": "int32"
-        }
-      }
-    },
-    "captions": {
-      "location": "/captions",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json
deleted file mode 100644
index f00c64aac..000000000
--- a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json
+++ /dev/null
@@ -1,36 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "example_pipeline",
-    "base_path": "tests/example_data/subsets_input/mock_base_path",
-    "run_id": "example_pipeline_2024",
-    "component_id": "component_1",
-    "cache_key": "42"
-  },
-  "index": {
-    "location": "/index"
-  },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": {
-        "data": {
-          "type": "binary"
-        },
-        "height": {
-          "type": "int32"
-        },
-        "width": {
-          "type": "int32"
-        }
-      }
-    },
-    "captions": {
-      "location": "/captions",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json
deleted file mode 100644
index f7a6f429d..000000000
--- a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json
+++ /dev/null
@@ -1,36 +0,0 @@
-{
-  "metadata": {
-    "pipeline_name": "example_pipeline",
-    "base_path": "tests/example_data/subsets_input/mock_base_path",
-    "run_id": "example_pipeline_2024",
-    "component_id": "component_2",
-    "cache_key": "42"
-  },
-  "index": {
-    "location": "/index"
-  },
-  "subsets": {
-    "images": {
-      "location": "/images",
-      "fields": {
-        "data": {
-          "type": "binary"
-        },
-        "height": {
-          "type": "int32"
-        },
-        "width": {
-          "type": "int32"
-        }
-      }
-    },
-    "captions": {
-      "location": "/captions",
-      "fields": {
-        "data": {
-          "type": "binary"
-        }
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/tests/sample_pipeline_test/components/dummy_component/Dockerfile b/tests/integration_tests/sample_pipeline_test/components/dummy_component/Dockerfile
similarity index 100%
rename from tests/sample_pipeline_test/components/dummy_component/Dockerfile
rename to tests/integration_tests/sample_pipeline_test/components/dummy_component/Dockerfile
diff --git a/tests/sample_pipeline_test/components/dummy_component/README.md b/tests/integration_tests/sample_pipeline_test/components/dummy_component/README.md
similarity index 100%
rename from tests/sample_pipeline_test/components/dummy_component/README.md
rename to tests/integration_tests/sample_pipeline_test/components/dummy_component/README.md
diff --git a/tests/sample_pipeline_test/components/dummy_component/fondant_component.yaml b/tests/integration_tests/sample_pipeline_test/components/dummy_component/fondant_component.yaml
similarity index 73%
rename from tests/sample_pipeline_test/components/dummy_component/fondant_component.yaml
rename to tests/integration_tests/sample_pipeline_test/components/dummy_component/fondant_component.yaml
index 1091703eb..0a041fa3d 100644
--- a/tests/sample_pipeline_test/components/dummy_component/fondant_component.yaml
+++ b/tests/integration_tests/sample_pipeline_test/components/dummy_component/fondant_component.yaml
@@ -4,13 +4,9 @@ description: Dummy component for testing custom components
 image: fndnt/dummy_component:dev
 
 consumes:
-  text:
-    fields:
-      data:
+  text_data:
         type: string
 
 produces:
-  text:
-    fields:
-      data:
+  text_data:
         type: string
\ No newline at end of file
diff --git a/tests/sample_pipeline_test/components/dummy_component/requirements.txt b/tests/integration_tests/sample_pipeline_test/components/dummy_component/requirements.txt
similarity index 100%
rename from tests/sample_pipeline_test/components/dummy_component/requirements.txt
rename to tests/integration_tests/sample_pipeline_test/components/dummy_component/requirements.txt
diff --git a/tests/sample_pipeline_test/components/dummy_component/src/main.py b/tests/integration_tests/sample_pipeline_test/components/dummy_component/src/main.py
similarity index 100%
rename from tests/sample_pipeline_test/components/dummy_component/src/main.py
rename to tests/integration_tests/sample_pipeline_test/components/dummy_component/src/main.py
diff --git a/tests/sample_pipeline_test/components/load_from_parquet/fondant_component.yaml b/tests/integration_tests/sample_pipeline_test/components/load_from_parquet/fondant_component.yaml
similarity index 95%
rename from tests/sample_pipeline_test/components/load_from_parquet/fondant_component.yaml
rename to tests/integration_tests/sample_pipeline_test/components/load_from_parquet/fondant_component.yaml
index 35c43aadb..eddb6e580 100644
--- a/tests/sample_pipeline_test/components/load_from_parquet/fondant_component.yaml
+++ b/tests/integration_tests/sample_pipeline_test/components/load_from_parquet/fondant_component.yaml
@@ -3,9 +3,7 @@ description: Component that loads a dataset from a parquet uri
 image: fndnt/load_from_parquet:dev
 
 produces:
-  text:
-    fields:
-      data:
+  text_data:
         type: string
 
 args:
diff --git a/tests/sample_pipeline_test/data/sample.parquet b/tests/integration_tests/sample_pipeline_test/data/sample.parquet
similarity index 100%
rename from tests/sample_pipeline_test/data/sample.parquet
rename to tests/integration_tests/sample_pipeline_test/data/sample.parquet
diff --git a/tests/test_sample_pipeline.py b/tests/integration_tests/test_sample_pipeline.py
similarity index 91%
rename from tests/test_sample_pipeline.py
rename to tests/integration_tests/test_sample_pipeline.py
index fefc65531..8e7f6fbda 100644
--- a/tests/test_sample_pipeline.py
+++ b/tests/integration_tests/test_sample_pipeline.py
@@ -17,7 +17,7 @@
 # work around to make test executable on M1 Macbooks
 os.environ["DOCKER_DEFAULT_PLATFORM"] = "linux/amd64"
 
-BASE_PATH = Path("./tests/sample_pipeline_test")
+BASE_PATH = Path("./tests/integration_tests/sample_pipeline_test")
 NUMBER_OF_COMPONENTS = 3
 
 
@@ -57,6 +57,7 @@ def sample_pipeline(data_dir="./data") -> Pipeline:
     return pipeline
 
 
+@pytest.mark.skip(reason="Skipping due to random failure.")
 def test_local_runner(sample_pipeline, tmp_path_factory):
     with tmp_path_factory.mktemp("temp") as data_dir:
         sample_pipeline.base_path = str(data_dir)
@@ -64,7 +65,8 @@ def test_local_runner(sample_pipeline, tmp_path_factory):
             sample_pipeline,
             output_path="docker-compose.yaml",
             extra_volumes=[
-                str(Path("tests/sample_pipeline_test/data").resolve()) + ":/data",
+                str(Path("tests/integration_tests/sample_pipeline_test/data").resolve())
+                + ":/data",
             ],
         )
         DockerRunner().run("docker-compose.yaml")
diff --git a/tests/examples/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml b/tests/pipeline/examples/pipelines/compiled_pipeline/kubeflow_pipeline.yml
similarity index 100%
rename from tests/examples/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml
rename to tests/pipeline/examples/pipelines/compiled_pipeline/kubeflow_pipeline.yml
diff --git a/tests/examples/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
similarity index 62%
rename from tests/examples/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
rename to tests/pipeline/examples/pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
index abe5091ea..066519825 100644
--- a/tests/examples/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
+++ b/tests/pipeline/examples/pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml
@@ -3,16 +3,12 @@ description: This is an example component
 image: example_component:latest
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  captions:
-    fields:
-      data:
-        type: string
+  captions_data:
+    type: string
 
 args:
   storage_args:
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
similarity index 55%
rename from tests/examples/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
rename to tests/pipeline/examples/pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
index 2f9907df1..e9b67d68e 100644
--- a/tests/examples/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
+++ b/tests/pipeline/examples/pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml
@@ -3,18 +3,17 @@ description: This is an example component
 image: example_component:latest
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
+
+  caption_data:
+    type: string
 
 produces:
-  embeddings:
-    fields:
-      data:
-        type: array
-        items:
-          type: float32
+  embeddings_data:
+    type: array
+    items:
+      type: float32
 
 args:
   storage_args:
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
similarity index 61%
rename from tests/examples/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
rename to tests/pipeline/examples/pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
index 18ea49b2c..053b4c5b5 100644
--- a/tests/examples/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
+++ b/tests/pipeline/examples/pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
@@ -2,17 +2,16 @@ name: First component
 description: This is an example component
 image: example_component:latest
 
-produces:
-  images:
-    fields:
-      data:
-        type: binary
+consumes:
+  images_data:
+    type: binary
 
-  captions:
-    fields:
-      data:
-        type: string
+produces:
+  captions_data:
+    type: string
 
+  images_data:
+    type: binary
 args:
   storage_args:
     description: Storage arguments
diff --git a/tests/pipeline/examples/pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
new file mode 100644
index 000000000..a1a7995a2
--- /dev/null
+++ b/tests/pipeline/examples/pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml
@@ -0,0 +1,24 @@
+name: Second component
+description: This is an example component
+image: example_component:latest
+
+consumes:
+  images_data:
+    type: binary
+
+  captions_data:
+    type: string
+
+  captions_description:
+    type: binary
+
+produces:
+  embeddings_data:
+    type: array
+    items:
+      type: float32
+
+args:
+  storage_args:
+    description: Storage arguments
+    type: str
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
similarity index 53%
rename from tests/examples/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
rename to tests/pipeline/examples/pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
index 45964a8c6..053b4c5b5 100644
--- a/tests/examples/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
+++ b/tests/pipeline/examples/pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml
@@ -3,21 +3,15 @@ description: This is an example component
 image: example_component:latest
 
 consumes:
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 
 produces:
-  captions:
-    fields:
-      data:
-        type: string
+  captions_data:
+    type: string
 
-  images:
-    fields:
-      data:
-        type: binary
+  images_data:
+    type: binary
 args:
   storage_args:
     description: Storage arguments
diff --git a/tests/pipeline/examples/pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
new file mode 100644
index 000000000..8e0517f0a
--- /dev/null
+++ b/tests/pipeline/examples/pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml
@@ -0,0 +1,21 @@
+name: Second component
+description: This is an example component
+image: example_component:latest
+
+consumes:
+  images_data:
+    type: string
+
+  captions_data:
+    type: string
+
+produces:
+  embeddings_data:
+    type: array
+    items:
+      type: float32
+
+args:
+  storage_args:
+    description: Storage arguments
+    type: str
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/first_component/Dockerfile
similarity index 100%
rename from tests/examples/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile
rename to tests/pipeline/examples/pipelines/valid_pipeline/example_1/first_component/Dockerfile
diff --git a/tests/examples/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
similarity index 50%
rename from tests/examples/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
rename to tests/pipeline/examples/pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
index 45964a8c6..0841688e9 100644
--- a/tests/examples/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml
+++ b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml
@@ -2,22 +2,13 @@ name: First component
 description: This is an example component
 image: example_component:latest
 
-consumes:
-  images:
-    fields:
-      data:
-        type: binary
-
 produces:
-  captions:
-    fields:
-      data:
-        type: string
+  images_data:
+    type: binary
+
+  captions_data:
+    type: string
 
-  images:
-    fields:
-      data:
-        type: binary
 args:
   storage_args:
     description: Storage arguments
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/fourth_component/Dockerfile
similarity index 100%
rename from tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile
rename to tests/pipeline/examples/pipelines/valid_pipeline/example_1/fourth_component/Dockerfile
diff --git a/tests/pipeline/examples/pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
new file mode 100644
index 000000000..1cef340bd
--- /dev/null
+++ b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml
@@ -0,0 +1,29 @@
+name: Fourth component
+description: This is an example component
+image: example_component:latest
+
+consumes:
+  images_data:
+    type: binary
+
+  captions_data:
+    type: string
+
+  embeddings_data:
+    type: array
+    items:
+      type: float32
+
+produces:
+  images_data:
+    type: binary
+
+args:
+  storage_args:
+    description: Storage arguments
+    type: str
+  some_list:
+    description: Some list
+    type: list
+    items:
+      type: int
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/second_component/Dockerfile
similarity index 100%
rename from tests/examples/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile
rename to tests/pipeline/examples/pipelines/valid_pipeline/example_1/second_component/Dockerfile
diff --git a/tests/pipeline/examples/pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
new file mode 100644
index 000000000..fa328ae01
--- /dev/null
+++ b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml
@@ -0,0 +1,18 @@
+name: Second component
+description: This is an example component
+image: example_component:latest
+
+consumes:
+  images_data:
+    type: binary
+
+produces:
+  embeddings_data:
+    type: array
+    items:
+      type: float32
+
+args:
+  storage_args:
+    description: Storage arguments
+    type: str
\ No newline at end of file
diff --git a/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/third_component/Dockerfile
similarity index 100%
rename from tests/examples/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile
rename to tests/pipeline/examples/pipelines/valid_pipeline/example_1/third_component/Dockerfile
diff --git a/tests/examples/example_specs/components/component.yaml b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
similarity index 59%
rename from tests/examples/example_specs/components/component.yaml
rename to tests/pipeline/examples/pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
index 973cc3e6b..fb6ebbaa0 100644
--- a/tests/examples/example_specs/components/component.yaml
+++ b/tests/pipeline/examples/pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml
@@ -1,4 +1,4 @@
-name: Example component
+name: Third component
 description: This is an example component
 image: example_component:latest
 
@@ -6,18 +6,19 @@ consumes:
   images_data:
     type: binary
 
-produces:
-  images_data:
+  captions_data:
+    type: string
+
+  embeddings_data:
     type: array
     items:
       type: float32
-additionalFields: false
 
+produces:
+  images_data:
+    type: binary
 
 args:
-  flag:
-    description: user argument
+  storage_args:
+    description: Storage arguments
     type: str
-  value:
-    description: integer value
-    type: int
diff --git a/tests/test_compiler.py b/tests/pipeline/test_compiler.py
similarity index 99%
rename from tests/test_compiler.py
rename to tests/pipeline/test_compiler.py
index 903c7963c..2c34f7f4e 100644
--- a/tests/test_compiler.py
+++ b/tests/pipeline/test_compiler.py
@@ -20,9 +20,9 @@
     VertexPipelineConfigs,
 )
 
-COMPONENTS_PATH = Path("./tests/example_pipelines/valid_pipeline")
+COMPONENTS_PATH = Path("./tests/pipeline/examples/pipelines/valid_pipeline")
 
-VALID_PIPELINE = Path("./tests/example_pipelines/compiled_pipeline/")
+VALID_PIPELINE = Path("./tests/pipeline/examples/pipelines/compiled_pipeline/")
 
 TEST_PIPELINES = [
     (
diff --git a/tests/test_pipeline.py b/tests/pipeline/test_pipeline.py
similarity index 98%
rename from tests/test_pipeline.py
rename to tests/pipeline/test_pipeline.py
index 37d421ef6..b4deebc97 100644
--- a/tests/test_pipeline.py
+++ b/tests/pipeline/test_pipeline.py
@@ -8,8 +8,8 @@
 from fondant.core.exceptions import InvalidPipelineDefinition
 from fondant.pipeline import ComponentOp, Pipeline, Resources
 
-valid_pipeline_path = Path(__file__).parent / "example_pipelines/valid_pipeline"
-invalid_pipeline_path = Path(__file__).parent / "example_pipelines/invalid_pipeline"
+valid_pipeline_path = Path(__file__).parent / "examples/pipelines/valid_pipeline"
+invalid_pipeline_path = Path(__file__).parent / "examples/pipelines/invalid_pipeline"
 
 
 def yaml_file_to_dict(file_path):
diff --git a/tests/test_runner.py b/tests/pipeline/test_runner.py
similarity index 98%
rename from tests/test_runner.py
rename to tests/pipeline/test_runner.py
index 84ad63304..011f65e55 100644
--- a/tests/test_runner.py
+++ b/tests/pipeline/test_runner.py
@@ -11,7 +11,7 @@
     VertexRunner,
 )
 
-VALID_PIPELINE = Path("./tests/example_pipelines/compiled_pipeline/")
+VALID_PIPELINE = Path("./tests/pipeline/examples/pipelines/compiled_pipeline/")
 
 
 def test_docker_runner():
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 7897719aa..61fa8630f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -55,16 +55,16 @@ def test_basic_invocation(command):
 @pytest.mark.parametrize(
     "module_str",
     [
-        "example_modules.component",
-        "example_modules/component",
-        "example_modules.component.py",
-        "example_modules/component.py",
+        "examples.example_modules.component",
+        "examples.example_modules/component",
+        "examples.example_modules.component.py",
+        "examples.example_modules/component.py",
     ],
 )
 def test_get_module(module_str):
     """Test get module method."""
     module = get_module(module_str)
-    assert module.__name__ == "example_modules.component"
+    assert module.__name__ == "examples.example_modules.component"
 
 
 def test_get_module_error():
@@ -77,7 +77,7 @@ def test_get_module_error():
     "module_str",
     [
         __name__,  # cannot be split
-        "example_modules.component",  # module does not exist
+        "examples.example_modules.component",  # module does not exist
     ],
 )
 def test_component_from_module(module_str):
@@ -89,8 +89,10 @@ def test_component_from_module(module_str):
 @pytest.mark.parametrize(
     "module_str",
     [
-        "example_modules.invalid_component",  # module contains more than one component class
-        "example_modules.invalid_double_components",  # module does not contain a component class
+        # module contains more than one component class
+        "examples.example_modules.invalid_component",
+        # module does not contain a component class
+        "examples.example_modules.invalid_double_components",
     ],
 )
 def test_component_from_module_error(module_str):
@@ -103,7 +105,7 @@ def test_component_from_module_error(module_str):
     "module_str",
     [
         __name__,
-        "example_modules.pipeline",
+        "examples.example_modules.pipeline",
     ],
 )
 def test_pipeline_from_module(module_str):
@@ -115,8 +117,10 @@ def test_pipeline_from_module(module_str):
 @pytest.mark.parametrize(
     "module_str",
     [
-        "example_modules.component",  # module does not contain a pipeline instance
-        "example_modules.invalid_double_pipeline",  # module contains many pipeline instances
+        # module does not contain a pipeline instance
+        "examples.example_modules.component",
+        # module contains many pipeline instances
+        "examples.example_modules.invalid_double_pipeline",
     ],
 )
 def test_pipeline_from_module_error(module_str):
@@ -417,7 +421,7 @@ def test_vertex_run(tmp_path_factory):
 def test_component_build(mock_build, mock_push):
     """Test that the build command works as expected."""
     args = argparse.Namespace(
-        component_dir=Path(__file__).parent / "example_component",
+        component_dir=Path(__file__).parent / "examples/example_component",
         tag="image:test",
         build_arg=["key=value"],
         nocache=True,
@@ -435,7 +439,7 @@ def test_component_build(mock_build, mock_push):
 
     # Check that docker build and push were executed correctly
     mock_build.assert_called_with(
-        path=str(Path(__file__).parent / "example_component"),
+        path=str(Path(__file__).parent / "examples/example_component"),
         tag="image:test",
         buildargs={"key": "value"},
         nocache=True,
@@ -449,7 +453,7 @@ def test_component_build(mock_build, mock_push):
 
     # Check that the component specification file was updated correctly
     with open(
-        Path(__file__).parent / "example_component" / "fondant_component.yaml",
+        Path(__file__).parent / "examples/example_component" / "fondant_component.yaml",
         "r+",
     ) as f:
         content = f.read()
diff --git a/tox.ini b/tox.ini
index acd58f104..d22216b49 100644
--- a/tox.ini
+++ b/tox.ini
@@ -48,6 +48,6 @@ commands_pre=
     poetry install --all-extras
     poetry show
 commands=
-    poetry run python -m pytest tests -vv --cov fondant --cov-report term-missing
+    poetry run python -m pytest tests -vv --cov fondant --cov-report term-missing --ignore=tests/integration_tests
 commands_post=
     bash ./scripts/post-build.sh

From 521578fa891fc8cb6a677bc3b8b5406dbb1b03ec Mon Sep 17 00:00:00 2001
From: Robbe Sneyders <robbe.sneyders@gmail.com>
Date: Mon, 27 Nov 2023 10:21:30 +0100
Subject: [PATCH 4/4] Implement `previous_index` field (#668)

#656

We might want to validate this by checking that the field mentioned in
`previous_index` is also defined in the `consumes` section.
---
 .../download_images/fondant_component.yaml    |  1 -
 .../Dockerfile                                | 15 +++--
 .../embedding_based_laion_retrieval/README.md |  7 ++
 .../fondant_component.yaml                    |  5 +-
 .../src/main.py                               | 20 +++---
 .../test_requirements.txt                     |  1 +
 .../tests/pytest.ini                          |  2 +
 .../tests/test_component.py                   | 66 +++++++++++++++++++
 .../index_qdrant/fondant_component.yaml       | 14 ++--
 .../prompt_based_laion_retrieval/Dockerfile   | 15 +++--
 .../prompt_based_laion_retrieval/README.md    |  7 ++
 .../fondant_component.yaml                    |  5 +-
 .../prompt_based_laion_retrieval/src/main.py  | 17 +++--
 .../test_requirements.txt                     |  1 +
 .../tests/pytest.ini                          |  2 +
 .../tests/test_component.py                   | 66 +++++++++++++++++++
 src/fondant/component/executor.py             | 19 +-----
 src/fondant/core/component_spec.py            |  4 ++
 src/fondant/core/manifest.py                  |  5 +-
 src/fondant/core/schemas/component_spec.json  |  3 +
 .../examples/component_specs/component.yaml   |  2 -
 .../evolution_examples/2/component.yaml       |  6 +-
 .../evolution_examples/2/output_manifest.json | 16 -----
 23 files changed, 224 insertions(+), 75 deletions(-)
 create mode 100644 components/embedding_based_laion_retrieval/test_requirements.txt
 create mode 100644 components/embedding_based_laion_retrieval/tests/pytest.ini
 create mode 100644 components/embedding_based_laion_retrieval/tests/test_component.py
 create mode 100644 components/prompt_based_laion_retrieval/test_requirements.txt
 create mode 100644 components/prompt_based_laion_retrieval/tests/pytest.ini
 create mode 100644 components/prompt_based_laion_retrieval/tests/test_component.py

diff --git a/components/download_images/fondant_component.yaml b/components/download_images/fondant_component.yaml
index abe19c653..91efeca15 100644
--- a/components/download_images/fondant_component.yaml
+++ b/components/download_images/fondant_component.yaml
@@ -23,7 +23,6 @@ produces:
     type: int32
   images_height:
     type: int32
-#    additionalFields: false
 
 args:
   timeout:
diff --git a/components/embedding_based_laion_retrieval/Dockerfile b/components/embedding_based_laion_retrieval/Dockerfile
index 72525d884..0cdcde81a 100644
--- a/components/embedding_based_laion_retrieval/Dockerfile
+++ b/components/embedding_based_laion_retrieval/Dockerfile
@@ -1,4 +1,4 @@
-FROM --platform=linux/amd64 python:3.8-slim
+FROM --platform=linux/amd64 python:3.8-slim as base
 
 # System dependencies
 RUN apt-get update && \
@@ -16,8 +16,15 @@ RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team
 
 # Set the working directory to the component folder
 WORKDIR /component/src
+COPY src/ src/
+ENV PYTHONPATH "${PYTHONPATH}:./src"
 
-# Copy over src-files
-COPY src/ .
+FROM base as test
+COPY test_requirements.txt .
+RUN pip3 install --no-cache-dir -r test_requirements.txt
+COPY tests/ tests/
+RUN python -m pytest tests
 
-ENTRYPOINT ["fondant", "execute", "main"]
\ No newline at end of file
+FROM base
+WORKDIR /component/src
+ENTRYPOINT ["fondant", "execute", "main"]
diff --git a/components/embedding_based_laion_retrieval/README.md b/components/embedding_based_laion_retrieval/README.md
index f19d55b03..97e0866a5 100644
--- a/components/embedding_based_laion_retrieval/README.md
+++ b/components/embedding_based_laion_retrieval/README.md
@@ -14,6 +14,7 @@ used to find images similar to the embedded images / captions.
 **This component produces:**
 
 - images_url: string
+- embedding_id: string
 
 ### Arguments
 
@@ -45,3 +46,9 @@ embedding_based_laion_retrieval_op = ComponentOp.from_registry(
 pipeline.add_op(embedding_based_laion_retrieval_op, dependencies=[...])  #Add previous component as dependency
 ```
 
+### Testing
+
+You can run the tests using docker with BuildKit. From this directory, run:
+```
+docker build . --target test
+```
diff --git a/components/embedding_based_laion_retrieval/fondant_component.yaml b/components/embedding_based_laion_retrieval/fondant_component.yaml
index af147c158..d7616cfbd 100644
--- a/components/embedding_based_laion_retrieval/fondant_component.yaml
+++ b/components/embedding_based_laion_retrieval/fondant_component.yaml
@@ -15,7 +15,10 @@ consumes:
 produces:
   images_url:
     type: string
-# additionalFields: false
+  embedding_id:
+    type: string
+
+previous_index: embedding_id
 
 args:
   num_images:
diff --git a/components/embedding_based_laion_retrieval/src/main.py b/components/embedding_based_laion_retrieval/src/main.py
index 0f7697dc3..4d730f24c 100644
--- a/components/embedding_based_laion_retrieval/src/main.py
+++ b/components/embedding_based_laion_retrieval/src/main.py
@@ -1,7 +1,6 @@
 """This component retrieves image URLs from LAION-5B based on a set of CLIP embeddings."""
 import asyncio
 import concurrent.futures
-import functools
 import logging
 import typing as t
 
@@ -40,6 +39,10 @@ def __init__(
             modality=Modality.IMAGE,
         )
 
+    def query(self, id_: t.Any, embedding: t.List[float]) -> t.List[t.Dict]:
+        results = self.client.query(embedding_input=embedding)
+        return [dict(d, embedding_id=id_) for d in results]
+
     def transform(
         self,
         dataframe: pd.DataFrame,
@@ -53,23 +56,20 @@ async def async_query():
                 futures = [
                     loop.run_in_executor(
                         executor,
-                        functools.partial(
-                            self.client.query,
-                            embedding_input=embedding.tolist(),
-                        ),
+                        self.query,
+                        row.id,
+                        row.embeddings_data.tolist(),
                     )
-                    for embedding in dataframe["embeddings_data"]
+                    for row in dataframe.itertuples()
                 ]
                 for response in await asyncio.gather(*futures):
                     results.extend(response)
 
         loop.run_until_complete(async_query())
 
-        results_df = pd.DataFrame(results)["id", "url"]
+        results_df = pd.DataFrame(results)[["id", "url", "embedding_id"]]
         results_df = results_df.set_index("id")
 
-        # Cast the index to string
-        results_df.index = results_df.index.astype(str)
-        results_df.columns = ["images_url"]
+        results_df.rename(columns={"url": "images_url"})
 
         return results_df
diff --git a/components/embedding_based_laion_retrieval/test_requirements.txt b/components/embedding_based_laion_retrieval/test_requirements.txt
new file mode 100644
index 000000000..2a929edcc
--- /dev/null
+++ b/components/embedding_based_laion_retrieval/test_requirements.txt
@@ -0,0 +1 @@
+pytest==7.4.2
diff --git a/components/embedding_based_laion_retrieval/tests/pytest.ini b/components/embedding_based_laion_retrieval/tests/pytest.ini
new file mode 100644
index 000000000..bf6a8a517
--- /dev/null
+++ b/components/embedding_based_laion_retrieval/tests/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+pythonpath = ../src
\ No newline at end of file
diff --git a/components/embedding_based_laion_retrieval/tests/test_component.py b/components/embedding_based_laion_retrieval/tests/test_component.py
new file mode 100644
index 000000000..ba59028bf
--- /dev/null
+++ b/components/embedding_based_laion_retrieval/tests/test_component.py
@@ -0,0 +1,66 @@
+import typing as t
+
+import numpy as np
+import pandas as pd
+
+from src.main import LAIONRetrievalComponent
+
+
+def test_component(monkeypatch):
+    def mocked_client_query(embedding_input: t.List[float]) -> t.List[dict]:
+        if embedding_input == [1, 2]:
+            return [
+                {
+                    "id": "a",
+                    "url": "http://a",
+                },
+                {
+                    "id": "b",
+                    "url": "http://b",
+                },
+            ]
+        if embedding_input == [2, 3]:
+            return [
+                {
+                    "id": "c",
+                    "url": "http://c",
+                },
+                {
+                    "id": "d",
+                    "url": "http://d",
+                },
+            ]
+        msg = f"Unexpected value: `embeddings_input` was {embedding_input}"
+        raise ValueError(msg)
+
+    input_dataframe = pd.DataFrame.from_dict(
+        {
+            "id": ["1", "2"],
+            "embeddings_data": [np.array([1, 2]), np.array([2, 3])],
+        },
+    )
+
+    expected_output_dataframe = pd.DataFrame.from_dict(
+        {
+            "id": ["a", "b", "c", "d"],
+            "url": ["http://a", "http://b", "http://c", "http://d"],
+            "embedding_id": ["1", "1", "2", "2"],
+        },
+    )
+    expected_output_dataframe = expected_output_dataframe.set_index("id")
+
+    component = LAIONRetrievalComponent(
+        num_images=2,
+        aesthetic_score=9,
+        aesthetic_weight=0.5,
+    )
+
+    monkeypatch.setattr(component.client, "query", mocked_client_query)
+
+    output_dataframe = component.transform(input_dataframe)
+
+    pd.testing.assert_frame_equal(
+        left=expected_output_dataframe,
+        right=output_dataframe,
+        check_dtype=False,
+    )
diff --git a/components/index_qdrant/fondant_component.yaml b/components/index_qdrant/fondant_component.yaml
index 6feb3b257..68ea33847 100644
--- a/components/index_qdrant/fondant_component.yaml
+++ b/components/index_qdrant/fondant_component.yaml
@@ -7,14 +7,12 @@ image: 'fndnt/index_qdrant:dev'
 tags:
   - Data writing
 consumes:
-  text:
-    fields:
-      data:
-        type: string
-      embedding:
-        type: array
-        items:
-          type: float32
+  text_data:
+    type: string
+  embeddings_data:
+    type: array
+    items:
+      type: float32
 args:
   collection_name:
     description: The name of the Qdrant collection to upsert data into.
diff --git a/components/prompt_based_laion_retrieval/Dockerfile b/components/prompt_based_laion_retrieval/Dockerfile
index 72525d884..0cdcde81a 100644
--- a/components/prompt_based_laion_retrieval/Dockerfile
+++ b/components/prompt_based_laion_retrieval/Dockerfile
@@ -1,4 +1,4 @@
-FROM --platform=linux/amd64 python:3.8-slim
+FROM --platform=linux/amd64 python:3.8-slim as base
 
 # System dependencies
 RUN apt-get update && \
@@ -16,8 +16,15 @@ RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team
 
 # Set the working directory to the component folder
 WORKDIR /component/src
+COPY src/ src/
+ENV PYTHONPATH "${PYTHONPATH}:./src"
 
-# Copy over src-files
-COPY src/ .
+FROM base as test
+COPY test_requirements.txt .
+RUN pip3 install --no-cache-dir -r test_requirements.txt
+COPY tests/ tests/
+RUN python -m pytest tests
 
-ENTRYPOINT ["fondant", "execute", "main"]
\ No newline at end of file
+FROM base
+WORKDIR /component/src
+ENTRYPOINT ["fondant", "execute", "main"]
diff --git a/components/prompt_based_laion_retrieval/README.md b/components/prompt_based_laion_retrieval/README.md
index 8d7ffcf70..0551730d9 100644
--- a/components/prompt_based_laion_retrieval/README.md
+++ b/components/prompt_based_laion_retrieval/README.md
@@ -17,6 +17,7 @@ This component doesn’t return the actual images, only URLs.
 **This component produces:**
 
 - images_url: string
+- prompt_id: string
 
 ### Arguments
 
@@ -50,3 +51,9 @@ prompt_based_laion_retrieval_op = ComponentOp.from_registry(
 pipeline.add_op(prompt_based_laion_retrieval_op, dependencies=[...])  #Add previous component as dependency
 ```
 
+### Testing
+
+You can run the tests using docker with BuildKit. From this directory, run:
+```
+docker build . --target test
+```
diff --git a/components/prompt_based_laion_retrieval/fondant_component.yaml b/components/prompt_based_laion_retrieval/fondant_component.yaml
index 02ea08349..3ac3604ac 100644
--- a/components/prompt_based_laion_retrieval/fondant_component.yaml
+++ b/components/prompt_based_laion_retrieval/fondant_component.yaml
@@ -16,7 +16,10 @@ consumes:
 produces:
   images_url:
     type: string
-#  additionalFields: false
+  prompt_id:
+    type: string
+
+previous_index: prompt_id
 
 args:
   num_images:
diff --git a/components/prompt_based_laion_retrieval/src/main.py b/components/prompt_based_laion_retrieval/src/main.py
index 2168f5ef0..bd3cee783 100644
--- a/components/prompt_based_laion_retrieval/src/main.py
+++ b/components/prompt_based_laion_retrieval/src/main.py
@@ -41,6 +41,10 @@ def __init__(
             modality=Modality.IMAGE,
         )
 
+    def query(self, id_: t.Any, prompt: str) -> t.List[t.Dict]:
+        results = self.client.query(text=prompt)
+        return [dict(d, prompt_id=id_) for d in results]
+
     def transform(
         self,
         dataframe: pd.DataFrame,
@@ -53,21 +57,20 @@ async def async_query():
                 futures = [
                     loop.run_in_executor(
                         executor,
-                        self.client.query,
-                        prompt,
+                        self.query,
+                        row.id,
+                        row.prompts_text,
                     )
-                    for prompt in dataframe["prompts_text"]
+                    for row in dataframe.itertuples()
                 ]
                 for response in await asyncio.gather(*futures):
                     results.extend(response)
 
         loop.run_until_complete(async_query())
 
-        results_df = pd.DataFrame(results)["id", "url"]
+        results_df = pd.DataFrame(results)[["id", "url", "prompt_id"]]
         results_df = results_df.set_index("id")
 
-        # Cast the index to string
-        results_df.index = results_df.index.astype(str)
-        results_df.columns = ["images_url"]
+        results_df.rename(columns={"url": "images_url"})
 
         return results_df
diff --git a/components/prompt_based_laion_retrieval/test_requirements.txt b/components/prompt_based_laion_retrieval/test_requirements.txt
new file mode 100644
index 000000000..2a929edcc
--- /dev/null
+++ b/components/prompt_based_laion_retrieval/test_requirements.txt
@@ -0,0 +1 @@
+pytest==7.4.2
diff --git a/components/prompt_based_laion_retrieval/tests/pytest.ini b/components/prompt_based_laion_retrieval/tests/pytest.ini
new file mode 100644
index 000000000..bf6a8a517
--- /dev/null
+++ b/components/prompt_based_laion_retrieval/tests/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+pythonpath = ../src
\ No newline at end of file
diff --git a/components/prompt_based_laion_retrieval/tests/test_component.py b/components/prompt_based_laion_retrieval/tests/test_component.py
new file mode 100644
index 000000000..7a3a268e6
--- /dev/null
+++ b/components/prompt_based_laion_retrieval/tests/test_component.py
@@ -0,0 +1,66 @@
+import typing as t
+
+import pandas as pd
+
+from src.main import LAIONRetrievalComponent
+
+
+def test_component(monkeypatch):
+    def mocked_client_query(text: str) -> t.List[dict]:
+        if text == "first prompt":
+            return [
+                {
+                    "id": "a",
+                    "url": "http://a",
+                },
+                {
+                    "id": "b",
+                    "url": "http://b",
+                },
+            ]
+        if text == "second prompt":
+            return [
+                {
+                    "id": "c",
+                    "url": "http://c",
+                },
+                {
+                    "id": "d",
+                    "url": "http://d",
+                },
+            ]
+        msg = f"Unexpected value: `text` was {text}"
+        raise ValueError(msg)
+
+    input_dataframe = pd.DataFrame.from_dict(
+        {
+            "id": ["1", "2"],
+            "prompts_text": ["first prompt", "second prompt"],
+        },
+    )
+
+    expected_output_dataframe = pd.DataFrame.from_dict(
+        {
+            "id": ["a", "b", "c", "d"],
+            "url": ["http://a", "http://b", "http://c", "http://d"],
+            "prompt_id": ["1", "1", "2", "2"],
+        },
+    )
+    expected_output_dataframe = expected_output_dataframe.set_index("id")
+
+    component = LAIONRetrievalComponent(
+        num_images=2,
+        aesthetic_score=9,
+        aesthetic_weight=0.5,
+        url="",
+    )
+
+    monkeypatch.setattr(component.client, "query", mocked_client_query)
+
+    output_dataframe = component.transform(input_dataframe)
+
+    pd.testing.assert_frame_equal(
+        left=expected_output_dataframe,
+        right=output_dataframe,
+        check_dtype=False,
+    )
diff --git a/src/fondant/component/executor.py b/src/fondant/component/executor.py
index d77200da8..571bc60bb 100644
--- a/src/fondant/component/executor.py
+++ b/src/fondant/component/executor.py
@@ -548,28 +548,11 @@ def _execute_component(
         )
 
         # Clear divisions if component spec indicates that the index is changed
-        if self._infer_index_change():
+        if self.spec.previous_index is not None:
             dataframe.clear_divisions()
 
         return dataframe
 
-    # TODO: fix in #244
-    def _infer_index_change(self) -> bool:
-        """Infer if this component changes the index based on its component spec."""
-        """
-        if not self.spec.accepts_additional_subsets:
-            return True
-        if not self.spec.outputs_additional_subsets:
-            return True
-        for subset in self.spec.consumes.values():
-            if not subset.additional_fields:
-                return True
-        return any(
-            not subset.additional_fields for subset in self.spec.produces.values()
-        )
-        """
-        return False
-
 
 class DaskWriteExecutor(Executor[DaskWriteComponent]):
     """Base class for a Fondant write component."""
diff --git a/src/fondant/core/component_spec.py b/src/fondant/core/component_spec.py
index 4dd945568..1700e10a1 100644
--- a/src/fondant/core/component_spec.py
+++ b/src/fondant/core/component_spec.py
@@ -181,6 +181,10 @@ def produces(self) -> t.Mapping[str, Field]:
             },
         )
 
+    @property
+    def previous_index(self) -> t.Optional[str]:
+        return self._specification.get("previous_index")
+
     @property
     def args(self) -> t.Mapping[str, Argument]:
         args = self.default_arguments
diff --git a/src/fondant/core/manifest.py b/src/fondant/core/manifest.py
index 58c8ab045..4f0aab480 100644
--- a/src/fondant/core/manifest.py
+++ b/src/fondant/core/manifest.py
@@ -267,7 +267,10 @@ def evolve(  # : PLR0912 (too many branches)
             Field(name="index", location=component_spec.component_folder_name),
         )
 
-        # TODO handle additionalFields
+        # Remove all previous fields if the component changes the index
+        if component_spec.previous_index:
+            for field_name in evolved_manifest.fields:
+                evolved_manifest.remove_field(field_name)
 
         # Add or update all produced fields defined in the component spec
         for name, field in component_spec.produces.items():
diff --git a/src/fondant/core/schemas/component_spec.json b/src/fondant/core/schemas/component_spec.json
index 064ea027d..dfa6bf68c 100644
--- a/src/fondant/core/schemas/component_spec.json
+++ b/src/fondant/core/schemas/component_spec.json
@@ -33,6 +33,9 @@
     "produces": {
       "$ref": "common.json#/definitions/fields"
     },
+    "previous_index": {
+      "type": "string"
+    },
     "args": {
       "$ref": "#/definitions/args"
     }
diff --git a/tests/component/examples/component_specs/component.yaml b/tests/component/examples/component_specs/component.yaml
index 973cc3e6b..d1f28b76e 100644
--- a/tests/component/examples/component_specs/component.yaml
+++ b/tests/component/examples/component_specs/component.yaml
@@ -11,8 +11,6 @@ produces:
     type: array
     items:
       type: float32
-additionalFields: false
-
 
 args:
   flag:
diff --git a/tests/core/examples/evolution_examples/2/component.yaml b/tests/core/examples/evolution_examples/2/component.yaml
index 2352adcb5..95d9300d1 100644
--- a/tests/core/examples/evolution_examples/2/component.yaml
+++ b/tests/core/examples/evolution_examples/2/component.yaml
@@ -7,8 +7,10 @@ consumes:
     type: binary
 
 produces:
-  images_encoding:
-    type: string
+  images_data:
+    type: binary
+
+previous_index: "true"  # Only used to remove old fields for now
 
 args:
   storage_args:
diff --git a/tests/core/examples/evolution_examples/2/output_manifest.json b/tests/core/examples/evolution_examples/2/output_manifest.json
index ca1f6f361..db62fda15 100644
--- a/tests/core/examples/evolution_examples/2/output_manifest.json
+++ b/tests/core/examples/evolution_examples/2/output_manifest.json
@@ -9,25 +9,9 @@
       "location":"/example_component"
    },
    "fields": {
-      "images_width": {
-         "type": "int32",
-         "location":"/example_component"
-      },
-      "images_height": {
-         "type": "int32",
-         "location":"/example_component"
-      },
       "images_data": {
          "type": "binary",
          "location":"/example_component"
-      },
-      "captions_data": {
-         "type": "binary",
-         "location":"/example_component"
-      },
-      "images_encoding": {
-         "type": "string",
-         "location":"/example_component"
       }
    }
 }
\ No newline at end of file