diff --git a/src/fondant/core/component_spec.py b/src/fondant/core/component_spec.py index cf177e07c..4dd945568 100644 --- a/src/fondant/core/component_spec.py +++ b/src/fondant/core/component_spec.py @@ -66,34 +66,6 @@ def kubeflow_type(self) -> str: return lookup[self.type] -class ComponentSubset: - """ - Class representing a Fondant Component subset. - - Args: - specification: the part of the component json representing the subset - """ - - def __init__(self, specification: t.Dict[str, t.Any]) -> None: - self._specification = specification - - def __repr__(self) -> str: - return f"{self.__class__.__name__}({self._specification!r})" - - @property - def fields(self) -> t.Mapping[str, Field]: - return types.MappingProxyType( - { - name: Field(name=name, type=Type.from_json(field)) - for name, field in self._specification["fields"].items() - }, - ) - - @property - def additional_fields(self) -> bool: - return self._specification.get("additionalFields", True) - - class ComponentSpec: """ Class representing a Fondant component specification. @@ -190,39 +162,25 @@ def tags(self) -> t.List[str]: return self._specification.get("tags", None) @property - def index(self): - return ComponentSubset({"fields": {}}) - - @property - def consumes(self) -> t.Mapping[str, ComponentSubset]: - """The subsets consumed by the component as an immutable mapping.""" + def consumes(self) -> t.Mapping[str, Field]: + """The fields consumed by the component as an immutable mapping.""" return types.MappingProxyType( { - name: ComponentSubset(subset) - for name, subset in self._specification.get("consumes", {}).items() - if name != "additionalSubsets" + name: Field(name=name, type=Type.from_json(field)) + for name, field in self._specification.get("consumes", {}).items() }, ) @property - def produces(self) -> t.Mapping[str, ComponentSubset]: - """The subsets produced by the component as an immutable mapping.""" + def produces(self) -> t.Mapping[str, Field]: + """The fields produced by the component as an immutable mapping.""" return types.MappingProxyType( { - name: ComponentSubset(subset) - for name, subset in self._specification.get("produces", {}).items() - if name != "additionalSubsets" + name: Field(name=name, type=Type.from_json(field)) + for name, field in self._specification.get("produces", {}).items() }, ) - @property - def accepts_additional_subsets(self) -> bool: - return self._specification.get("consumes", {}).get("additionalSubsets", True) - - @property - def outputs_additional_subsets(self) -> bool: - return self._specification.get("produces", {}).get("additionalSubsets", True) - @property def args(self) -> t.Mapping[str, Argument]: args = self.default_arguments diff --git a/src/fondant/core/manifest.py b/src/fondant/core/manifest.py index 692c4e7cd..fc750620d 100644 --- a/src/fondant/core/manifest.py +++ b/src/fondant/core/manifest.py @@ -4,6 +4,7 @@ import pkgutil import types import typing as t +from collections import OrderedDict from dataclasses import asdict, dataclass from pathlib import Path @@ -18,59 +19,6 @@ from fondant.core.schema import Field, Type -class Subset: - """ - Class representing a Fondant subset. - - Args: - specification: The part of the manifest json representing the subset - base_path: The base path which the subset location is defined relative to - """ - - def __init__(self, specification: dict, *, base_path: str) -> None: - self._specification = specification - self._base_path = base_path - - @property - def location(self) -> str: - """The absolute location of the subset.""" - return self._base_path + self._specification["location"] - - @property - def fields(self) -> t.Mapping[str, Field]: - """The fields of the subset returned as an immutable mapping.""" - return types.MappingProxyType( - { - name: Field(name=name, type=Type.from_json(field)) - for name, field in self._specification["fields"].items() - }, - ) - - def add_field(self, name: str, type_: Type, *, overwrite: bool = False) -> None: - if not overwrite and name in self._specification["fields"]: - msg = f"A field with name {name} already exists" - raise ValueError(msg) - - self._specification["fields"][name] = type_.to_json() - - def remove_field(self, name: str) -> None: - del self._specification["fields"][name] - - def __repr__(self) -> str: - return f"{self.__class__.__name__}({self._specification!r})" - - -class Index(Subset): - """Special case of a subset for the index, which has fixed fields.""" - - @property - def fields(self) -> t.Dict[str, Field]: - return { - "id": Field(name="id", type=Type("string")), - "source": Field(name="source", type=Type("string")), - } - - @dataclass class Metadata: """ @@ -171,8 +119,8 @@ def create( specification = { "metadata": metadata.to_dict(), - "index": {"location": f"/{pipeline_name}/{run_id}/{component_id}/index"}, - "subsets": {}, + "index": {"location": f"/{component_id}"}, + "fields": {}, } return cls(specification) @@ -196,6 +144,10 @@ def copy(self) -> "Manifest": def metadata(self) -> t.Dict[str, t.Any]: return self._specification["metadata"] + @property + def index(self) -> Field: + return Field(name="Index", location=self._specification["index"]["location"]) + def update_metadata(self, key: str, value: t.Any) -> None: self.metadata[key] = value @@ -203,6 +155,44 @@ def update_metadata(self, key: str, value: t.Any) -> None: def base_path(self) -> str: return self.metadata["base_path"] + @property + def field_mapping(self) -> t.Mapping[str, t.List[str]]: + """ + Retrieve a mapping of field locations to corresponding field names. + A dictionary where keys are field locations and values are lists + of column names. + + The method returns an immutable OrderedDict where the first dict element contains the + location of the dataframe with the index. This allows an efficient left join operation. + + Example: + { + "/base_path/component_1": ["Name", "HP"], + "/base_path/component_2": ["Type 1", "Type 2"], + } + """ + field_mapping = {} + for field_name, field in {"id": self.index, **self.fields}.items(): + location = ( + f"{self.base_path}/{self.pipeline_name}/{self.run_id}{field.location}" + ) + if location in field_mapping: + field_mapping[location].append(field_name) + else: + field_mapping[location] = [field_name] + + # Sort field mapping that the first dataset contains the index + sorted_keys = sorted( + field_mapping.keys(), + key=lambda key: "id" in field_mapping[key], + reverse=True, + ) + sorted_field_mapping = OrderedDict( + (key, field_mapping[key]) for key in sorted_keys + ) + + return types.MappingProxyType(sorted_field_mapping) + @property def run_id(self) -> str: return self.metadata["run_id"] @@ -220,39 +210,61 @@ def cache_key(self) -> str: return self.metadata["cache_key"] @property - def index(self) -> Index: - return Index(self._specification["index"], base_path=self.base_path) - - @property - def subsets(self) -> t.Mapping[str, Subset]: - """The subsets of the manifest as an immutable mapping.""" + def fields(self) -> t.Mapping[str, Field]: + """The fields of the manifest as an immutable mapping.""" return types.MappingProxyType( { - name: Subset(subset, base_path=self.base_path) - for name, subset in self._specification["subsets"].items() + name: Field( + name=name, + type=Type(field["type"]), + location=field["location"], + ) + for name, field in self._specification["fields"].items() }, ) - def add_subset( - self, - name: str, - fields: t.Iterable[t.Union[Field, t.Tuple[str, Type]]], - ) -> None: - if name in self._specification["subsets"]: - msg = f"A subset with name {name} already exists" + def add_or_update_field(self, field: Field, overwrite: bool = False): + """Add or update field to manifest.""" + if field.name == "index": + self._add_or_update_index(field, overwrite=True) + elif overwrite is False and field.name in self._specification["fields"]: + msg = ( + f"A field with name {field.name} already exists. Set overwrite to true, " + f"if you want to update the field." + ) + raise ValueError(msg) + else: + self._specification["fields"][field.name] = { + "location": f"/{self.component_id}", + **field.type.to_json(), + } + + def _add_or_update_index(self, field: Field, overwrite: bool = True): + """Add or update the manifest index.""" + if overwrite is False: + msg = ( + "The index already exists. Set overwrite to true, " + "if you want to update the index." + ) + raise ValueError(msg) + + if field.name != "index": + msg = ( + f"The field name is {field.name}. If you try to update the index, set the field" + f"name to `index`." + ) raise ValueError(msg) - self._specification["subsets"][name] = { - "location": f"/{self.pipeline_name}/{self.run_id}/{self.component_id}/{name}", - "fields": {name: type_.to_json() for name, type_ in fields}, + self._specification["index"] = { + "location": f"/{field.location}", } - def remove_subset(self, name: str) -> None: - if name not in self._specification["subsets"]: - msg = f"Subset {name} not found in specification" + def remove_field(self, name: str) -> None: + if name not in self._specification["fields"]: + msg = f"Field {name} not found in specification" raise ValueError(msg) - del self._specification["subsets"][name] + del self._specification["fields"][name] def evolve( # noqa : PLR0912 (too many branches) self, @@ -274,68 +286,23 @@ def evolve( # noqa : PLR0912 (too many branches) # Update `component_id` of the metadata component_id = component_spec.component_folder_name evolved_manifest.update_metadata(key="component_id", value=component_id) + if run_id is not None: evolved_manifest.update_metadata(key="run_id", value=run_id) - # Update index location as this is currently always rewritten - evolved_manifest.index._specification[ - "location" - ] = f"/{self.pipeline_name}/{evolved_manifest.run_id}/{component_id}/index" - - # If additionalSubsets is False in consumes, - # Remove all subsets from the manifest that are not listed - if not component_spec.accepts_additional_subsets: - for subset_name in evolved_manifest.subsets: - if subset_name not in component_spec.consumes: - evolved_manifest.remove_subset(subset_name) - - # If additionalSubsets is False in produces, - # Remove all subsets from the manifest that are not listed - if not component_spec.outputs_additional_subsets: - for subset_name in evolved_manifest.subsets: - if subset_name not in component_spec.produces: - evolved_manifest.remove_subset(subset_name) - - # If additionalFields is False for a consumed subset, - # Remove all fields from that subset that are not listed - for subset_name, subset in component_spec.consumes.items(): - if subset_name in evolved_manifest.subsets and not subset.additional_fields: - for field_name in evolved_manifest.subsets[subset_name].fields: - if field_name not in subset.fields: - evolved_manifest.subsets[subset_name].remove_field( - field_name, - ) - - # For each output subset defined in the component, add or update it - for subset_name, subset in component_spec.produces.items(): - # Subset is already in manifest, update it - if subset_name in evolved_manifest.subsets: - # If additional fields are not allowed, remove the fields not defined in the - # component spec produces section - if not subset.additional_fields: - for field_name in evolved_manifest.subsets[subset_name].fields: - if field_name not in subset.fields: - evolved_manifest.subsets[subset_name].remove_field( - field_name, - ) - - # Add fields defined in the component spec produces section - # Overwrite to persist changes to the field (eg. type of column) - for field in subset.fields.values(): - evolved_manifest.subsets[subset_name].add_field( - field.name, - field.type, - overwrite=True, - ) - - # Update subset location as this is currently always rewritten - evolved_manifest.subsets[subset_name]._specification[ - "location" - ] = f"/{self.pipeline_name}/{evolved_manifest.run_id}/{component_id}/{subset_name}" - - # Subset is not yet in manifest, add it - else: - evolved_manifest.add_subset(subset_name, subset.fields.values()) + # Update index location as this is always rewritten + evolved_manifest.add_or_update_field( + Field(name="index", location=component_spec.component_folder_name), + ) + + # TODO handle additionalFields + + # Add or update all produced fields defined in the component spec + for name, field in component_spec.produces.items(): + # If field was not part of the input manifest, add field to output manifest. + # If field was part of the input manifest and got produced by the component, update + # the manifest field. + evolved_manifest.add_or_update_field(field, overwrite=True) return evolved_manifest diff --git a/src/fondant/core/schema.py b/src/fondant/core/schema.py index ca9bb0944..dc940b5f7 100644 --- a/src/fondant/core/schema.py +++ b/src/fondant/core/schema.py @@ -5,6 +5,7 @@ import os import re import typing as t +from dataclasses import dataclass from enum import Enum import pyarrow as pa @@ -161,11 +162,33 @@ def __eq__(self, other): return False -class Field(t.NamedTuple): - """Class representing a single field or column in a Fondant subset.""" +class Field: + """Class representing a single field or column in a Fondant dataset.""" - name: str - type: Type + def __init__( + self, + name: str, + type: Type = None, + location: str = "", + ) -> None: + self._name = name + self._type = type + self._location = location + + @property + def name(self) -> str: + """The name of the field.""" + return self._name + + @property + def type(self) -> Type: + """The absolute location of the field.""" + return self._type + + @property + def location(self) -> str: + """The relative location of the field.""" + return self._location def validate_partition_size(arg_value): diff --git a/src/fondant/core/schemas/component_spec.json b/src/fondant/core/schemas/component_spec.json index 8d684a3e5..064ea027d 100644 --- a/src/fondant/core/schemas/component_spec.json +++ b/src/fondant/core/schemas/component_spec.json @@ -28,44 +28,16 @@ } }, "consumes": { - "$ref": "#/definitions/subsets" + "$ref": "common.json#/definitions/fields" }, "produces": { - "$ref": "#/definitions/subsets" + "$ref": "common.json#/definitions/fields" }, "args": { "$ref": "#/definitions/args" } }, "definitions": { - "subset": { - "type": "object", - "properties": { - "fields": { - "$ref": "common.json#/definitions/fields" - }, - "additionalFields": { - "type": "boolean", - "default": true - } - }, - "required": [ - "fields" - ] - }, - "subsets": { - "type": "object", - "properties": { - "additionalSubsets": { - "type": "boolean", - "default": true - } - }, - "minProperties": 1, - "additionalProperties": { - "$ref": "#/definitions/subset" - } - }, "args": { "type": "object", "minProperties": 1, diff --git a/src/fondant/core/schemas/manifest.json b/src/fondant/core/schemas/manifest.json index 00ad6d1cc..77365dd5f 100644 --- a/src/fondant/core/schemas/manifest.json +++ b/src/fondant/core/schemas/manifest.json @@ -37,36 +37,33 @@ "location" ] }, - "subsets": { - "$ref": "#/definitions/subsets" + "fields": { + "$ref": "#/definitions/fields" } }, "required": [ "metadata", "index", - "subsets" + "fields" ], "definitions": { - "subset": { + "field": { "type": "object", "properties": { "location": { "type": "string", "pattern": "/.*" - }, - "fields": { - "$ref": "common.json#/definitions/fields" } }, "required": [ "location", - "fields" + "type" ] }, - "subsets": { + "fields": { "type": "object", "additionalProperties": { - "$ref": "#/definitions/subset" + "$ref": "#/definitions/field" } } } diff --git a/tests/example_specs/evolution_examples/4/component.yaml b/tests/core/examples/component_specs/invalid_component.yaml similarity index 84% rename from tests/example_specs/evolution_examples/4/component.yaml rename to tests/core/examples/component_specs/invalid_component.yaml index 067b06da0..d1c88c444 100644 --- a/tests/example_specs/evolution_examples/4/component.yaml +++ b/tests/core/examples/component_specs/invalid_component.yaml @@ -7,14 +7,14 @@ consumes: fields: data: type: binary - + produces: - images: + captions: fields: - encoding: + data: type: string -args: +Arguments: storage_args: description: Storage arguments - type: str + type: str \ No newline at end of file diff --git a/tests/example_specs/component_specs/kubeflow_component.yaml b/tests/core/examples/component_specs/kubeflow_component.yaml similarity index 100% rename from tests/example_specs/component_specs/kubeflow_component.yaml rename to tests/core/examples/component_specs/kubeflow_component.yaml diff --git a/tests/example_specs/evolution_examples/1/component.yaml b/tests/core/examples/component_specs/valid_component.yaml similarity index 62% rename from tests/example_specs/evolution_examples/1/component.yaml rename to tests/core/examples/component_specs/valid_component.yaml index 22ae0feb1..1215af1bd 100644 --- a/tests/example_specs/evolution_examples/1/component.yaml +++ b/tests/core/examples/component_specs/valid_component.yaml @@ -1,20 +1,21 @@ name: Example component description: This is an example component image: example_component:latest +tags: + - Data loading consumes: images: - fields: - data: - type: binary - -produces: + type: binary + embeddings: - fields: - data: - type: array - items: - type: float32 + type: array + items: + type: float32 + +produces: + captions: + type: string args: storage_args: diff --git a/tests/example_specs/component_specs/valid_component_no_args.yaml b/tests/core/examples/component_specs/valid_component_no_args.yaml similarity index 59% rename from tests/example_specs/component_specs/valid_component_no_args.yaml rename to tests/core/examples/component_specs/valid_component_no_args.yaml index c3adfa6aa..de11cb2ee 100644 --- a/tests/example_specs/component_specs/valid_component_no_args.yaml +++ b/tests/core/examples/component_specs/valid_component_no_args.yaml @@ -4,12 +4,13 @@ image: example_component:latest consumes: images: - fields: - data: - type: binary + type: binary + + embeddings: + type: array + items: + type: float32 produces: captions: - fields: - data: - type: string \ No newline at end of file + type: string diff --git a/tests/example_specs/component_specs/invalid_component.yaml b/tests/core/examples/evolution_examples/1/component.yaml similarity index 59% rename from tests/example_specs/component_specs/invalid_component.yaml rename to tests/core/examples/evolution_examples/1/component.yaml index 3fc8128b5..e91ae6f46 100644 --- a/tests/example_specs/component_specs/invalid_component.yaml +++ b/tests/core/examples/evolution_examples/1/component.yaml @@ -3,14 +3,16 @@ description: This is an example component image: example_component:latest consumes: - images: - data: binary + images_data: + type: binary produces: - captions: - data: string + embeddings_data: + type: array + items: + type: float32 -Arguments: +args: storage_args: description: Storage arguments - type: str \ No newline at end of file + type: str diff --git a/tests/core/examples/evolution_examples/1/output_manifest.json b/tests/core/examples/evolution_examples/1/output_manifest.json new file mode 100644 index 000000000..2a73e5f29 --- /dev/null +++ b/tests/core/examples/evolution_examples/1/output_manifest.json @@ -0,0 +1,36 @@ +{ + "metadata":{ + "pipeline_name":"test_pipeline", + "base_path":"gs://bucket", + "run_id":"custom_run_id", + "component_id":"example_component" + }, + "index":{ + "location":"/example_component" + }, + "fields": { + "images_width": { + "type": "int32", + "location":"/example_component" + }, + "images_height": { + "type": "int32", + "location":"/example_component" + }, + "images_data": { + "type": "binary", + "location":"/example_component" + }, + "captions_data": { + "type": "binary", + "location":"/example_component" + }, + "embeddings_data": { + "type": "array", + "items": { + "type": "float32" + }, + "location":"/example_component" + } + } +} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/8/component.yaml b/tests/core/examples/evolution_examples/2/component.yaml similarity index 69% rename from tests/example_specs/evolution_examples/8/component.yaml rename to tests/core/examples/evolution_examples/2/component.yaml index 5c204b9c2..2352adcb5 100644 --- a/tests/example_specs/evolution_examples/8/component.yaml +++ b/tests/core/examples/evolution_examples/2/component.yaml @@ -3,10 +3,12 @@ description: This is an example component image: example_component:latest consumes: - images: - fields: - data: - type: binary + images_data: + type: binary + +produces: + images_encoding: + type: string args: storage_args: diff --git a/tests/core/examples/evolution_examples/2/output_manifest.json b/tests/core/examples/evolution_examples/2/output_manifest.json new file mode 100644 index 000000000..ca1f6f361 --- /dev/null +++ b/tests/core/examples/evolution_examples/2/output_manifest.json @@ -0,0 +1,33 @@ +{ + "metadata":{ + "pipeline_name":"test_pipeline", + "base_path":"gs://bucket", + "run_id":"custom_run_id", + "component_id":"example_component" + }, + "index":{ + "location":"/example_component" + }, + "fields": { + "images_width": { + "type": "int32", + "location":"/example_component" + }, + "images_height": { + "type": "int32", + "location":"/example_component" + }, + "images_data": { + "type": "binary", + "location":"/example_component" + }, + "captions_data": { + "type": "binary", + "location":"/example_component" + }, + "images_encoding": { + "type": "string", + "location":"/example_component" + } + } +} \ No newline at end of file diff --git a/tests/core/examples/evolution_examples/3/component.yaml b/tests/core/examples/evolution_examples/3/component.yaml new file mode 100644 index 000000000..13b1427b3 --- /dev/null +++ b/tests/core/examples/evolution_examples/3/component.yaml @@ -0,0 +1,16 @@ +name: Example component 1 +description: This is an example component +image: example_component_1:latest + +consumes: + images_data: + type: binary + +produces: + images_data: + type: string + +args: + storage_args: + description: Storage arguments + type: str diff --git a/tests/core/examples/evolution_examples/3/output_manifest.json b/tests/core/examples/evolution_examples/3/output_manifest.json new file mode 100644 index 000000000..b11f7d8a3 --- /dev/null +++ b/tests/core/examples/evolution_examples/3/output_manifest.json @@ -0,0 +1,29 @@ +{ + "metadata":{ + "pipeline_name":"test_pipeline", + "base_path":"gs://bucket", + "run_id":"custom_run_id", + "component_id":"example_component_1" + }, + "index":{ + "location":"/example_component_1" + }, + "fields": { + "images_width": { + "type": "int32", + "location":"/example_component" + }, + "images_height": { + "type": "int32", + "location":"/example_component" + }, + "images_data": { + "type": "string", + "location":"/example_component_1" + }, + "captions_data": { + "type": "binary", + "location":"/example_component" + } + } +} \ No newline at end of file diff --git a/tests/core/examples/evolution_examples/4/component.yaml b/tests/core/examples/evolution_examples/4/component.yaml new file mode 100644 index 000000000..1b766036d --- /dev/null +++ b/tests/core/examples/evolution_examples/4/component.yaml @@ -0,0 +1,12 @@ +name: Example component 1 +description: This is an example component +image: example_component_1:latest + +consumes: + images_data: + type: binary + +args: + storage_args: + description: Storage arguments + type: str diff --git a/tests/core/examples/evolution_examples/4/output_manifest.json b/tests/core/examples/evolution_examples/4/output_manifest.json new file mode 100644 index 000000000..929c380ab --- /dev/null +++ b/tests/core/examples/evolution_examples/4/output_manifest.json @@ -0,0 +1,29 @@ +{ + "metadata":{ + "pipeline_name":"test_pipeline", + "base_path":"gs://bucket", + "run_id":"custom_run_id", + "component_id":"example_component_1" + }, + "index":{ + "location":"/example_component_1" + }, + "fields": { + "images_width": { + "type": "int32", + "location":"/example_component" + }, + "images_height": { + "type": "int32", + "location":"/example_component" + }, + "images_data": { + "type": "binary", + "location":"/example_component" + }, + "captions_data": { + "type": "binary", + "location":"/example_component" + } + } +} \ No newline at end of file diff --git a/tests/core/examples/evolution_examples/input_manifest.json b/tests/core/examples/evolution_examples/input_manifest.json new file mode 100644 index 000000000..664367cc2 --- /dev/null +++ b/tests/core/examples/evolution_examples/input_manifest.json @@ -0,0 +1,29 @@ +{ + "metadata":{ + "pipeline_name":"test_pipeline", + "base_path":"gs://bucket", + "run_id":"12345", + "component_id":"example_component" + }, + "index":{ + "location":"/example_component" + }, + "fields": { + "images_width": { + "type": "int32", + "location":"/example_component" + }, + "images_height": { + "type": "int32", + "location":"/example_component" + }, + "images_data": { + "type": "binary", + "location":"/example_component" + }, + "captions_data": { + "type": "binary", + "location":"/example_component" + } + } +} \ No newline at end of file diff --git a/tests/core/examples/manifests/invalid_manifest.json b/tests/core/examples/manifests/invalid_manifest.json new file mode 100644 index 000000000..51ec6c5e5 --- /dev/null +++ b/tests/core/examples/manifests/invalid_manifest.json @@ -0,0 +1,14 @@ +{ + "metadata": { + "pipeline_name": "test_pipeline", + "base_path": "gs://bucket", + "run_id": "test_pipeline_12345", + "component_id": "67890" + }, + "index": { + "location": "/component1" + }, + "fields": { + "images": {} + } +} \ No newline at end of file diff --git a/tests/core/examples/manifests/valid_manifest.json b/tests/core/examples/manifests/valid_manifest.json new file mode 100644 index 000000000..0f7c58126 --- /dev/null +++ b/tests/core/examples/manifests/valid_manifest.json @@ -0,0 +1,29 @@ +{ + "metadata": { + "pipeline_name": "test_pipeline", + "base_path": "gs://bucket", + "run_id": "test_pipeline_12345", + "component_id": "67890" + }, + "index": { + "location": "/component1" + }, + "fields":{ + "images": { + "location": "/component1", + "type": "binary" + }, + "height": { + "location": "/component2", + "type": "int32" + }, + "width": { + "location": "/component2", + "type": "int32" + }, + "caption": { + "location": "/component3", + "type": "string" + } + } +} \ No newline at end of file diff --git a/tests/test_component_specs.py b/tests/core/test_component_specs.py similarity index 85% rename from tests/test_component_specs.py rename to tests/core/test_component_specs.py index caf0344de..dcbf4c2ed 100644 --- a/tests/test_component_specs.py +++ b/tests/core/test_component_specs.py @@ -8,13 +8,12 @@ import yaml from fondant.core.component_spec import ( ComponentSpec, - ComponentSubset, KubeflowComponentSpec, ) from fondant.core.exceptions import InvalidComponentSpec from fondant.core.schema import Type -component_specs_path = Path(__file__).parent / "example_specs/component_specs" +component_specs_path = Path(__file__).parent / "examples/component_specs" @pytest.fixture() @@ -49,12 +48,19 @@ def test_component_spec_pkgutil_error(mock_get_data): def test_component_spec_validation(valid_fondant_schema, invalid_fondant_schema): - """Test that the manifest is validated correctly on instantiation.""" + """Test that the component spec is validated correctly on instantiation.""" ComponentSpec(valid_fondant_schema) with pytest.raises(InvalidComponentSpec): ComponentSpec(invalid_fondant_schema) +def test_component_spec_load_from_file(valid_fondant_schema, invalid_fondant_schema): + """Test that the component spec is validated correctly on instantiation.""" + ComponentSpec.from_file(component_specs_path / "valid_component.yaml") + with pytest.raises(InvalidComponentSpec): + ComponentSpec.from_file(component_specs_path / "invalid_component.yaml") + + def test_attribute_access(valid_fondant_schema): """ Test that attributes can be accessed as expected: @@ -65,8 +71,8 @@ def test_attribute_access(valid_fondant_schema): assert fondant_component.name == "Example component" assert fondant_component.description == "This is an example component" - assert fondant_component.consumes["images"].fields["data"].type == Type("binary") - assert fondant_component.consumes["embeddings"].fields["data"].type == Type.list( + assert fondant_component.consumes["images"].type == Type("binary") + assert fondant_component.consumes["embeddings"].type == Type.list( Type("float32"), ) @@ -129,15 +135,3 @@ def test_kubeflow_component_spec_repr(valid_kubeflow_schema): kubeflow_component_spec = KubeflowComponentSpec(valid_kubeflow_schema) expected_repr = f"KubeflowComponentSpec({valid_kubeflow_schema!r})" assert repr(kubeflow_component_spec) == expected_repr - - -def test_component_subset_repr(): - """Test that the __repr__ method of ComponentSubset returns the expected string.""" - component_subset_schema = { - "name": "Example subset", - "description": "This is an example subset", - } - - component_subset = ComponentSubset(component_subset_schema) - expected_repr = f"ComponentSubset({component_subset_schema!r})" - assert repr(component_subset) == expected_repr diff --git a/tests/core/test_manifest.py b/tests/core/test_manifest.py new file mode 100644 index 000000000..0b255b9df --- /dev/null +++ b/tests/core/test_manifest.py @@ -0,0 +1,246 @@ +import json +import pkgutil +from collections import OrderedDict +from pathlib import Path + +import pytest +from fondant.core.component_spec import ComponentSpec +from fondant.core.exceptions import InvalidManifest +from fondant.core.manifest import Field, Manifest, Type + +manifest_path = Path(__file__).parent / "examples" / "manifests" +component_specs_path = Path(__file__).parent / "examples" / "component_specs" + + +@pytest.fixture() +def valid_manifest(): + with open(manifest_path / "valid_manifest.json") as f: + return json.load(f) + + +@pytest.fixture() +def invalid_manifest(): + with open(manifest_path / "invalid_manifest.json") as f: + return json.load(f) + + +def test_manifest_validation(valid_manifest, invalid_manifest): + """Test that the manifest is validated correctly on instantiation.""" + Manifest(valid_manifest) + with pytest.raises(InvalidManifest): + Manifest(invalid_manifest) + + +def test_set_base_path(valid_manifest): + """Test altering the base path in the manifest.""" + manifest = Manifest(valid_manifest) + tmp_path = "/tmp/base_path" + manifest.update_metadata(key="base_path", value=tmp_path) + + assert manifest.base_path == tmp_path + assert manifest._specification["metadata"]["base_path"] == tmp_path + + +def test_from_to_file(valid_manifest): + """Test reading from and writing to file.""" + tmp_path = "/tmp/manifest.json" + with open(tmp_path, "w", encoding="utf-8") as f: + json.dump(valid_manifest, f) + + manifest = Manifest.from_file(tmp_path) + assert manifest.metadata == valid_manifest["metadata"] + + manifest.to_file(tmp_path) + with open(tmp_path, encoding="utf-8") as f: + assert json.load(f) == valid_manifest + + +def test_attribute_access(valid_manifest): + """ + Test that attributes can be accessed as expected: + - Fixed properties should be accessible as an attribute + - Dynamic properties should be accessible by lookup. + """ + manifest = Manifest(valid_manifest) + + assert manifest.metadata == valid_manifest["metadata"] + assert manifest.index.location == "/component1" + assert manifest.fields["images"].location == "/component1" + assert manifest.fields["images"].type == Type("binary") + + +def test_manifest_creation(): + """Test the stepwise creation of a manifest via the Manifest class.""" + base_path = "gs://bucket" + run_id = "run_id" + pipeline_name = "pipeline_name" + component_id = "component_id" + cache_key = "42" + + manifest = Manifest.create( + pipeline_name=pipeline_name, + base_path=base_path, + run_id=run_id, + component_id=component_id, + cache_key=cache_key, + ) + + manifest.add_or_update_field(Field(name="width", type=Type("int32"))) + manifest.add_or_update_field(Field(name="height", type=Type("int32"))) + manifest.add_or_update_field(Field(name="data", type=Type("binary"))) + + assert manifest._specification == { + "metadata": { + "pipeline_name": pipeline_name, + "base_path": base_path, + "run_id": run_id, + "component_id": component_id, + "cache_key": cache_key, + }, + "index": {"location": f"/{component_id}"}, + "fields": { + "width": { + "type": "int32", + "location": f"/{component_id}", + }, + "height": { + "type": "int32", + "location": f"/{component_id}", + }, + "data": { + "type": "binary", + "location": f"/{component_id}", + }, + }, + } + + +def test_manifest_repr(): + manifest = Manifest.create( + pipeline_name="NAME", + base_path="/", + run_id="A", + component_id="1", + cache_key="42", + ) + assert ( + manifest.__repr__() + == "Manifest({'metadata': {'base_path': '/', 'pipeline_name': 'NAME', 'run_id': 'A'," + " 'component_id': '1', 'cache_key': '42'}," + " 'index': {'location': '/1'}, 'fields': {}})" + ) + + +def test_manifest_alteration(valid_manifest): + """Test alteration functionalities of a manifest via the Manifest class.""" + manifest = Manifest(valid_manifest) + + # test adding a subset + manifest.add_or_update_field(Field(name="width2", type=Type("int32"))) + manifest.add_or_update_field(Field(name="height2", type=Type("int32"))) + + assert "width2" in manifest.fields + assert "height2" in manifest.fields + + # test adding a duplicate subset + with pytest.raises(ValueError, match="A field with name width2 already exists"): + manifest.add_or_update_field(Field(name="width2", type=Type("int32"))) + + # test removing a subset + manifest.remove_field("width2") + assert "images2" not in manifest.fields + + # test removing a nonexistant subset + with pytest.raises(ValueError, match="Field pictures not found in specification"): + manifest.remove_field("pictures") + + +def test_manifest_copy_and_adapt(valid_manifest): + """Test that a manifest can be copied and adapted without changing the original.""" + manifest = Manifest(valid_manifest) + new_manifest = manifest.copy() + new_manifest.remove_field("images") + assert manifest._specification == valid_manifest + assert new_manifest._specification != valid_manifest + + +def test_no_validate_schema(monkeypatch, valid_manifest): + monkeypatch.setattr(pkgutil, "get_data", lambda package, resource: None) + with pytest.raises(FileNotFoundError): + Manifest(valid_manifest) + + +def test_evolve_manifest(): + """Test that the fields are evolved as expected.""" + run_id = "A" + spec = ComponentSpec.from_file(component_specs_path / "valid_component.yaml") + input_manifest = Manifest.create( + pipeline_name="NAME", + base_path="/base_path", + run_id=run_id, + component_id="component_1", + cache_key="42", + ) + + output_manifest = input_manifest.evolve(component_spec=spec, run_id=run_id) + + assert output_manifest.base_path == input_manifest.base_path + assert output_manifest.run_id == run_id + assert output_manifest.index.location == "/" + spec.component_folder_name + assert output_manifest.fields["captions"].type.name == "string" + + +def test_fields(): + """Test that the fields can added and updated as expected.""" + run_id = "A" + manifest = Manifest.create( + pipeline_name="NAME", + base_path="/base_path", + run_id=run_id, + component_id="component_1", + cache_key="42", + ) + + # add a field + manifest.add_or_update_field(Field(name="field_1", type=Type("int32"))) + assert "field_1" in manifest.fields + + # add a duplicate field, but overwrite (update) + manifest.add_or_update_field( + Field(name="field_1", type=Type("string")), + overwrite=True, + ) + assert manifest.fields["field_1"].type.name == "string" + + # add duplicate field + with pytest.raises( + ValueError, + match="A field with name field_1 already exists. Set overwrite to true, " + "if you want to update the field.", + ): + manifest.add_or_update_field( + Field(name="field_1", type=Type("string")), + overwrite=False, + ) + + # delete a field + manifest.remove_field(name="field_1") + assert "field_1" not in manifest.fields + + +def test_field_mapping(valid_manifest): + """Test field mapping generation.""" + manifest = Manifest(valid_manifest) + manifest.add_or_update_field(Field(name="index", location="component2")) + field_mapping = manifest.field_mapping + assert field_mapping == OrderedDict( + { + "gs://bucket/test_pipeline/test_pipeline_12345/component2": [ + "id", + "height", + "width", + ], + "gs://bucket/test_pipeline/test_pipeline_12345/component1": ["images"], + "gs://bucket/test_pipeline/test_pipeline_12345/component3": ["caption"], + }, + ) diff --git a/tests/test_manifest_evolution.py b/tests/core/test_manifest_evolution.py similarity index 83% rename from tests/test_manifest_evolution.py rename to tests/core/test_manifest_evolution.py index c79b76aaf..0d9181701 100644 --- a/tests/test_manifest_evolution.py +++ b/tests/core/test_manifest_evolution.py @@ -6,7 +6,7 @@ from fondant.core.component_spec import ComponentSpec from fondant.core.manifest import Manifest -examples_path = Path(__file__).parent / "example_specs/evolution_examples" +examples_path = Path(__file__).parent / "examples/evolution_examples" @pytest.fixture() @@ -41,7 +41,7 @@ def test_component_spec_location_update(): with open(examples_path / "input_manifest.json") as f: input_manifest = json.load(f) - with open(examples_path / "7/component.yaml") as f: + with open(examples_path / "4/component.yaml") as f: specification = yaml.safe_load(f) manifest = Manifest(input_manifest) @@ -50,7 +50,4 @@ def test_component_spec_location_update(): component_spec=component_spec, ) - assert ( - evolved_manifest._specification["subsets"]["images"]["location"] - == "/test_pipeline/12345/example_component/images" - ) + assert evolved_manifest.index.location == "/" + component_spec.component_folder_name diff --git a/tests/test_schema.py b/tests/core/test_schema.py similarity index 100% rename from tests/test_schema.py rename to tests/core/test_schema.py diff --git a/tests/example_specs/component_specs/valid_component.yaml b/tests/example_specs/component_specs/valid_component.yaml deleted file mode 100644 index c4b99e837..000000000 --- a/tests/example_specs/component_specs/valid_component.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: Example component -description: This is an example component -image: example_component:latest -tags: - - Data loading - -consumes: - images: - fields: - data: - type: binary - - embeddings: - fields: - data: - type: array - items: - type: float32 - -produces: - captions: - fields: - data: - type: string - -args: - storage_args: - description: Storage arguments - type: str \ No newline at end of file diff --git a/tests/example_specs/components/input_manifest.json b/tests/example_specs/components/input_manifest.json deleted file mode 100644 index 7af13d599..000000000 --- a/tests/example_specs/components/input_manifest.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "metadata": { - "pipeline_name": "test_pipeline", - "base_path": "/bucket", - "run_id": "test_pipeline_12345", - "component_id": "67890" - }, - "index": { - "location": "/index/12345/example_component" - }, - "subsets": { - "images": { - "location": "/images", - "fields": { - "data": { - "type": "binary" - } - } - } - - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/1/output_manifest.json b/tests/example_specs/evolution_examples/1/output_manifest.json deleted file mode 100644 index 17b94c0b0..000000000 --- a/tests/example_specs/evolution_examples/1/output_manifest.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"custom_run_id", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/custom_run_id/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/12345/example_component/images", - "fields":{ - "width":{ - "type":"int32" - }, - "height":{ - "type":"int32" - }, - "data":{ - "type":"binary" - } - } - }, - "captions":{ - "location":"/test_pipeline/12345/example_component/captions", - "fields":{ - "data":{ - "type":"binary" - } - } - }, - "embeddings":{ - "location":"/test_pipeline/custom_run_id/example_component/embeddings", - "fields":{ - "data":{ - "type":"array", - "items":{ - "type":"float32" - } - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/2/component.yaml b/tests/example_specs/evolution_examples/2/component.yaml deleted file mode 100644 index f37ff99d1..000000000 --- a/tests/example_specs/evolution_examples/2/component.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: Example component -description: This is an example component -image: example_component:latest - -consumes: - images: - fields: - data: - type: binary - additionalSubsets: false - -produces: - embeddings: - fields: - data: - type: array - items: - type: float32 - -args: - storage_args: - description: Storage arguments - type: str diff --git a/tests/example_specs/evolution_examples/2/output_manifest.json b/tests/example_specs/evolution_examples/2/output_manifest.json deleted file mode 100644 index 3a40b1c9d..000000000 --- a/tests/example_specs/evolution_examples/2/output_manifest.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"custom_run_id", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/custom_run_id/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/12345/example_component/images", - "fields":{ - "width":{ - "type":"int32" - }, - "height":{ - "type":"int32" - }, - "data":{ - "type":"binary" - } - } - }, - "embeddings":{ - "location":"/test_pipeline/custom_run_id/example_component/embeddings", - "fields":{ - "data":{ - "type":"array", - "items":{ - "type":"float32" - } - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/3/component.yaml b/tests/example_specs/evolution_examples/3/component.yaml deleted file mode 100644 index 6753a083b..000000000 --- a/tests/example_specs/evolution_examples/3/component.yaml +++ /dev/null @@ -1,24 +0,0 @@ -name: Example component -description: This is an example component -image: example_component:latest - -consumes: - images: - fields: - data: - type: binary - additionalFields: false - additionalSubsets: false - -produces: - embeddings: - fields: - data: - type: array - items: - type: float32 - -args: - storage_args: - description: Storage arguments - type: str diff --git a/tests/example_specs/evolution_examples/3/output_manifest.json b/tests/example_specs/evolution_examples/3/output_manifest.json deleted file mode 100644 index a9abda6d0..000000000 --- a/tests/example_specs/evolution_examples/3/output_manifest.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"custom_run_id", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/custom_run_id/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/12345/example_component/images", - "fields":{ - "data":{ - "type":"binary" - } - } - }, - "embeddings":{ - "location":"/test_pipeline/custom_run_id/example_component/embeddings", - "fields":{ - "data":{ - "type":"array", - "items":{ - "type":"float32" - } - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/4/output_manifest.json b/tests/example_specs/evolution_examples/4/output_manifest.json deleted file mode 100644 index 24af4f2ac..000000000 --- a/tests/example_specs/evolution_examples/4/output_manifest.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"custom_run_id", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/custom_run_id/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/custom_run_id/example_component/images", - "fields":{ - "width":{ - "type":"int32" - }, - "height":{ - "type":"int32" - }, - "data":{ - "type":"binary" - }, - "encoding":{ - "type":"string" - } - } - }, - "captions":{ - "location":"/test_pipeline/12345/example_component/captions", - "fields":{ - "data":{ - "type":"binary" - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/5/component.yaml b/tests/example_specs/evolution_examples/5/component.yaml deleted file mode 100644 index 93aaf68b3..000000000 --- a/tests/example_specs/evolution_examples/5/component.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: Example component -description: This is an example component -image: example_component:latest - -consumes: - images: - fields: - data: - type: binary - -produces: - images: - fields: - encoding: - type: string - additionalFields: false - -args: - storage_args: - description: Storage arguments - type: str diff --git a/tests/example_specs/evolution_examples/5/output_manifest.json b/tests/example_specs/evolution_examples/5/output_manifest.json deleted file mode 100644 index 8bcf6141d..000000000 --- a/tests/example_specs/evolution_examples/5/output_manifest.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"custom_run_id", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/custom_run_id/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/custom_run_id/example_component/images", - "fields":{ - "encoding":{ - "type":"string" - } - } - }, - "captions":{ - "location":"/test_pipeline/12345/example_component/captions", - "fields":{ - "data":{ - "type":"binary" - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/6/component.yaml b/tests/example_specs/evolution_examples/6/component.yaml deleted file mode 100644 index 065061791..000000000 --- a/tests/example_specs/evolution_examples/6/component.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: Example component -description: This is an example component -image: example_component:latest - -consumes: - images: - fields: - data: - type: binary - -produces: - images: - fields: - encoding: - type: string - additionalFields: false - additionalSubsets: false - -args: - storage_args: - description: Storage arguments - type: str diff --git a/tests/example_specs/evolution_examples/6/output_manifest.json b/tests/example_specs/evolution_examples/6/output_manifest.json deleted file mode 100644 index b7521bf66..000000000 --- a/tests/example_specs/evolution_examples/6/output_manifest.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"custom_run_id", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/custom_run_id/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/custom_run_id/example_component/images", - "fields":{ - "encoding":{ - "type":"string" - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/7/component.yaml b/tests/example_specs/evolution_examples/7/component.yaml deleted file mode 100644 index 5746ffa4d..000000000 --- a/tests/example_specs/evolution_examples/7/component.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: Example component -description: This is an example component -image: example_component:latest - -consumes: - images: - fields: - data: - type: binary - -produces: - images: - fields: - data: - type: string - additionalFields: false - additionalSubsets: false - -args: - storage_args: - description: Storage arguments - type: str diff --git a/tests/example_specs/evolution_examples/7/output_manifest.json b/tests/example_specs/evolution_examples/7/output_manifest.json deleted file mode 100644 index a9eb8a308..000000000 --- a/tests/example_specs/evolution_examples/7/output_manifest.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"custom_run_id", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/custom_run_id/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/custom_run_id/example_component/images", - "fields":{ - "data":{ - "type":"string" - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/evolution_examples/8/output_manifest.json b/tests/example_specs/evolution_examples/8/output_manifest.json deleted file mode 100644 index de2621c49..000000000 --- a/tests/example_specs/evolution_examples/8/output_manifest.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "metadata": { - "pipeline_name": "test_pipeline", - "base_path": "gs://bucket", - "run_id": "custom_run_id", - "component_id": "example_component" - }, - "index": { - "location": "/test_pipeline/custom_run_id/example_component/index" - }, - "subsets": { - "images": { - "location": "/test_pipeline/12345/example_component/images", - "fields": { - "width": { - "type": "int32" - }, - "height": { - "type": "int32" - }, - "data": { - "type": "binary" - } - } - }, - "captions": { - "location": "/test_pipeline/12345/example_component/captions", - "fields": { - "data": { - "type": "binary" - } - } - } - } -} diff --git a/tests/example_specs/evolution_examples/input_manifest.json b/tests/example_specs/evolution_examples/input_manifest.json deleted file mode 100644 index 2ecf37243..000000000 --- a/tests/example_specs/evolution_examples/input_manifest.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "metadata":{ - "pipeline_name":"test_pipeline", - "base_path":"gs://bucket", - "run_id":"12345", - "component_id":"example_component" - }, - "index":{ - "location":"/test_pipeline/12345/example_component/index" - }, - "subsets":{ - "images":{ - "location":"/test_pipeline/12345/example_component/images", - "fields":{ - "width":{ - "type":"int32" - }, - "height":{ - "type":"int32" - }, - "data":{ - "type":"binary" - } - } - }, - "captions":{ - "location":"/test_pipeline/12345/example_component/captions", - "fields":{ - "data":{ - "type":"binary" - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/manifests/invalid_manifest.json b/tests/example_specs/manifests/invalid_manifest.json deleted file mode 100644 index 3fe8b1097..000000000 --- a/tests/example_specs/manifests/invalid_manifest.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "metadata": { - "base_path": "gs://bucket" - }, - "index": { - "location": "/index" - }, - "subsets": { - "images": { - "location": "/images", - "fields": [] - } - } -} \ No newline at end of file diff --git a/tests/example_specs/manifests/valid_manifest.json b/tests/example_specs/manifests/valid_manifest.json deleted file mode 100644 index 9bc00c512..000000000 --- a/tests/example_specs/manifests/valid_manifest.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "metadata": { - "pipeline_name": "test_pipeline", - "base_path": "gs://bucket", - "run_id": "test_pipeline_12345", - "component_id": "67890" - }, - "index": { - "location": "/index" - }, - "subsets": { - "images": { - "location": "/images", - "fields": { - "data": { - "type": "binary" - }, - "height": { - "type": "int32" - }, - "width": { - "type": "int32" - } - } - }, - "captions": { - "location": "/captions", - "fields": { - "data": { - "type": "binary" - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json b/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json deleted file mode 100644 index 541775f84..000000000 --- a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "metadata": { - "pipeline_name": "example_pipeline", - "base_path": "tests/example_data/subsets_input/mock_base_path", - "run_id": "example_pipeline_2023", - "component_id": "component_1", - "cache_key": "42" - }, - "index": { - "location": "/index" - }, - "subsets": { - "images": { - "location": "/images", - "fields": { - "data": { - "type": "binary" - }, - "height": { - "type": "int32" - }, - "width": { - "type": "int32" - } - } - }, - "captions": { - "location": "/captions", - "fields": { - "data": { - "type": "binary" - } - } - } - } -} \ No newline at end of file diff --git a/tests/example_component/Dockerfile b/tests/examples/example_component/Dockerfile similarity index 100% rename from tests/example_component/Dockerfile rename to tests/examples/example_component/Dockerfile diff --git a/tests/example_component/fondant_component.yaml b/tests/examples/example_component/fondant_component.yaml similarity index 100% rename from tests/example_component/fondant_component.yaml rename to tests/examples/example_component/fondant_component.yaml diff --git a/tests/example_data/components/1.yaml b/tests/examples/example_data/components/1.yaml similarity index 100% rename from tests/example_data/components/1.yaml rename to tests/examples/example_data/components/1.yaml diff --git a/tests/example_data/manifest.json b/tests/examples/example_data/manifest.json similarity index 100% rename from tests/example_data/manifest.json rename to tests/examples/example_data/manifest.json diff --git a/tests/example_data/raw/split.py b/tests/examples/example_data/raw/split.py similarity index 100% rename from tests/example_data/raw/split.py rename to tests/examples/example_data/raw/split.py diff --git a/tests/example_data/raw/testset.parquet b/tests/examples/example_data/raw/testset.parquet similarity index 100% rename from tests/example_data/raw/testset.parquet rename to tests/examples/example_data/raw/testset.parquet diff --git a/tests/example_data/subsets_input/index/part.0.parquet b/tests/examples/example_data/subsets_input/index/part.0.parquet similarity index 100% rename from tests/example_data/subsets_input/index/part.0.parquet rename to tests/examples/example_data/subsets_input/index/part.0.parquet diff --git a/tests/example_data/subsets_input/index/part.1.parquet b/tests/examples/example_data/subsets_input/index/part.1.parquet similarity index 100% rename from tests/example_data/subsets_input/index/part.1.parquet rename to tests/examples/example_data/subsets_input/index/part.1.parquet diff --git a/tests/example_data/subsets_input/index/part.2.parquet b/tests/examples/example_data/subsets_input/index/part.2.parquet similarity index 100% rename from tests/example_data/subsets_input/index/part.2.parquet rename to tests/examples/example_data/subsets_input/index/part.2.parquet diff --git a/tests/example_data/subsets_input/properties/part.0.parquet b/tests/examples/example_data/subsets_input/properties/part.0.parquet similarity index 100% rename from tests/example_data/subsets_input/properties/part.0.parquet rename to tests/examples/example_data/subsets_input/properties/part.0.parquet diff --git a/tests/example_data/subsets_input/properties/part.1.parquet b/tests/examples/example_data/subsets_input/properties/part.1.parquet similarity index 100% rename from tests/example_data/subsets_input/properties/part.1.parquet rename to tests/examples/example_data/subsets_input/properties/part.1.parquet diff --git a/tests/example_data/subsets_input/properties/part.2.parquet b/tests/examples/example_data/subsets_input/properties/part.2.parquet similarity index 100% rename from tests/example_data/subsets_input/properties/part.2.parquet rename to tests/examples/example_data/subsets_input/properties/part.2.parquet diff --git a/tests/example_data/subsets_input/types/part.0.parquet b/tests/examples/example_data/subsets_input/types/part.0.parquet similarity index 100% rename from tests/example_data/subsets_input/types/part.0.parquet rename to tests/examples/example_data/subsets_input/types/part.0.parquet diff --git a/tests/example_data/subsets_input/types/part.1.parquet b/tests/examples/example_data/subsets_input/types/part.1.parquet similarity index 100% rename from tests/example_data/subsets_input/types/part.1.parquet rename to tests/examples/example_data/subsets_input/types/part.1.parquet diff --git a/tests/example_data/subsets_input/types/part.2.parquet b/tests/examples/example_data/subsets_input/types/part.2.parquet similarity index 100% rename from tests/example_data/subsets_input/types/part.2.parquet rename to tests/examples/example_data/subsets_input/types/part.2.parquet diff --git a/tests/example_modules/component.py b/tests/examples/example_modules/component.py similarity index 100% rename from tests/example_modules/component.py rename to tests/examples/example_modules/component.py diff --git a/tests/example_modules/invalid_component.py b/tests/examples/example_modules/invalid_component.py similarity index 100% rename from tests/example_modules/invalid_component.py rename to tests/examples/example_modules/invalid_component.py diff --git a/tests/example_modules/invalid_double_components.py b/tests/examples/example_modules/invalid_double_components.py similarity index 100% rename from tests/example_modules/invalid_double_components.py rename to tests/examples/example_modules/invalid_double_components.py diff --git a/tests/example_modules/invalid_double_pipeline.py b/tests/examples/example_modules/invalid_double_pipeline.py similarity index 100% rename from tests/example_modules/invalid_double_pipeline.py rename to tests/examples/example_modules/invalid_double_pipeline.py diff --git a/tests/example_modules/pipeline.py b/tests/examples/example_modules/pipeline.py similarity index 100% rename from tests/example_modules/pipeline.py rename to tests/examples/example_modules/pipeline.py diff --git a/tests/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml b/tests/examples/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml similarity index 100% rename from tests/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml rename to tests/examples/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml diff --git a/tests/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml rename to tests/examples/example_pipelines/invalid_pipeline/example_1/first_component/fondant_component.yaml diff --git a/tests/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml rename to tests/examples/example_pipelines/invalid_pipeline/example_1/second_component/fondant_component.yaml diff --git a/tests/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml rename to tests/examples/example_pipelines/invalid_pipeline/example_2/first_component/fondant_component.yaml diff --git a/tests/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml rename to tests/examples/example_pipelines/invalid_pipeline/example_2/second_component/fondant_component.yaml diff --git a/tests/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml rename to tests/examples/example_pipelines/invalid_pipeline/example_3/first_component/fondant_component.yaml diff --git a/tests/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml b/tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml rename to tests/examples/example_pipelines/invalid_pipeline/example_3/second_component/fondant_component.yaml diff --git a/tests/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile rename to tests/examples/example_pipelines/valid_pipeline/example_1/first_component/Dockerfile diff --git a/tests/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml rename to tests/examples/example_pipelines/valid_pipeline/example_1/first_component/fondant_component.yaml diff --git a/tests/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile rename to tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/Dockerfile diff --git a/tests/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml rename to tests/examples/example_pipelines/valid_pipeline/example_1/fourth_component/fondant_component.yaml diff --git a/tests/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile rename to tests/examples/example_pipelines/valid_pipeline/example_1/second_component/Dockerfile diff --git a/tests/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml rename to tests/examples/example_pipelines/valid_pipeline/example_1/second_component/fondant_component.yaml diff --git a/tests/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile b/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile rename to tests/examples/example_pipelines/valid_pipeline/example_1/third_component/Dockerfile diff --git a/tests/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml b/tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml similarity index 100% rename from tests/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml rename to tests/examples/example_pipelines/valid_pipeline/example_1/third_component/fondant_component.yaml diff --git a/tests/example_specs/components/arguments/component.yaml b/tests/examples/example_specs/components/arguments/component.yaml similarity index 100% rename from tests/example_specs/components/arguments/component.yaml rename to tests/examples/example_specs/components/arguments/component.yaml diff --git a/tests/example_specs/components/arguments/component_default_args.yaml b/tests/examples/example_specs/components/arguments/component_default_args.yaml similarity index 100% rename from tests/example_specs/components/arguments/component_default_args.yaml rename to tests/examples/example_specs/components/arguments/component_default_args.yaml diff --git a/tests/example_specs/components/arguments/input_manifest.json b/tests/examples/example_specs/components/arguments/input_manifest.json similarity index 60% rename from tests/example_specs/components/arguments/input_manifest.json rename to tests/examples/example_specs/components/arguments/input_manifest.json index d98ddd95b..9ee2494f9 100644 --- a/tests/example_specs/components/arguments/input_manifest.json +++ b/tests/examples/example_specs/components/arguments/input_manifest.json @@ -7,16 +7,12 @@ "cache_key": "00" }, "index": { - "location": "/index" + "location": "/component_1" }, - "subsets": { - "images": { - "location": "/images", - "fields": { - "data": { - "type": "binary" - } - } + "fields": { + "data": { + "type": "binary", + "location": "/component_1" } } } \ No newline at end of file diff --git a/tests/example_specs/components/component.yaml b/tests/examples/example_specs/components/component.yaml similarity index 56% rename from tests/example_specs/components/component.yaml rename to tests/examples/example_specs/components/component.yaml index 19c8d5856..973cc3e6b 100644 --- a/tests/example_specs/components/component.yaml +++ b/tests/examples/example_specs/components/component.yaml @@ -3,19 +3,15 @@ description: This is an example component image: example_component:latest consumes: - images: - fields: - data: - type: binary + images_data: + type: binary produces: - embeddings: - fields: - data: - type: array - items: - type: float32 - additionalFields: false + images_data: + type: array + items: + type: float32 +additionalFields: false args: diff --git a/tests/examples/example_specs/components/input_manifest.json b/tests/examples/example_specs/components/input_manifest.json new file mode 100644 index 000000000..80fa0b91d --- /dev/null +++ b/tests/examples/example_specs/components/input_manifest.json @@ -0,0 +1,17 @@ +{ + "metadata": { + "pipeline_name": "test_pipeline", + "base_path": "/bucket", + "run_id": "test_pipeline_12345", + "component_id": "67890" + }, + "index": { + "location": "/example_component" + }, + "fields": { + "data": { + "location": "/example_component", + "type": "binary" + } + } +} \ No newline at end of file diff --git a/tests/example_specs/mock_base_path/example_pipeline/cache/42.txt b/tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt similarity index 100% rename from tests/example_specs/mock_base_path/example_pipeline/cache/42.txt rename to tests/examples/example_specs/mock_base_path/example_pipeline/cache/42.txt diff --git a/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json new file mode 100644 index 000000000..47c2fe949 --- /dev/null +++ b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_1/manifest.json @@ -0,0 +1,31 @@ +{ + "metadata": { + "pipeline_name": "example_pipeline", + "base_path": "tests/example_data/subsets_input/mock_base_path", + "run_id": "example_pipeline_2023", + "component_id": "component_1", + "cache_key": "42" + }, + "index": { + "location": "/component_1" + }, + "fields": + { + "data": { + "type": "binary", + "location": "/component_1" + }, + "height": { + "type": "int32", + "location": "/component_1" + }, + "width": { + "type": "int32", + "location": "/component_1" + }, + "captions": { + "type": "string", + "location": "/component_1" + } + } +} \ No newline at end of file diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json similarity index 100% rename from tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json rename to tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2023/component_2/manifest.json diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json similarity index 100% rename from tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json rename to tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_1/manifest.json diff --git a/tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json b/tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json similarity index 100% rename from tests/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json rename to tests/examples/example_specs/mock_base_path/example_pipeline/example_pipeline_2024/component_2/manifest.json diff --git a/tests/test_component.py b/tests/test_component.py index e759bd367..e5dcb3bc3 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -377,38 +377,22 @@ def test_wrap_transform(): "description": "Component for testing", "image": "component:test", "consumes": { - "image": { - "fields": { - "height": { - "type": "int16", - }, - "width": { - "type": "int16", - }, - }, + "image_height": { + "type": "int16", }, - "caption": { - "fields": { - "text": { - "type": "string", - }, - }, + "image_width": { + "type": "int16", + }, + "caption_text": { + "type": "string", }, }, "produces": { - "caption": { - "fields": { - "text": { - "type": "string", - }, - }, + "caption_text": { + "type": "string", }, - "image": { - "fields": { - "height": { - "type": "int16", - }, - }, + "image_height": { + "type": "int16", }, }, }, @@ -425,9 +409,9 @@ def test_wrap_transform(): def transform(dataframe: pd.DataFrame) -> pd.DataFrame: # Check hierarchical columns assert dataframe.columns.tolist() == [ - ("image", "height"), - ("image", "width"), - ("caption", "text"), + "image_height", + "image_width", + "caption_text", ] return dataframe diff --git a/tests/test_manifest.py b/tests/test_manifest.py deleted file mode 100644 index 3af3ea425..000000000 --- a/tests/test_manifest.py +++ /dev/null @@ -1,239 +0,0 @@ -import json -import pkgutil -from pathlib import Path - -import pytest -from fondant.core.exceptions import InvalidManifest -from fondant.core.manifest import Field, Index, Manifest, Subset, Type - -manifest_path = Path(__file__).parent / "example_specs/manifests" - - -@pytest.fixture() -def valid_manifest(): - with open(manifest_path / "valid_manifest.json") as f: - return json.load(f) - - -@pytest.fixture() -def invalid_manifest(): - with open(manifest_path / "invalid_manifest.json") as f: - return json.load(f) - - -def test_manifest_validation(valid_manifest, invalid_manifest): - """Test that the manifest is validated correctly on instantiation.""" - Manifest(valid_manifest) - with pytest.raises(InvalidManifest): - Manifest(invalid_manifest) - - -def test_subset_init(): - """Test initializing a subset.""" - subset_spec = { - "location": "/images/ABC/123", - "fields": { - "data": { - "type": "binary", - }, - }, - } - subset = Subset(specification=subset_spec, base_path="/tmp") - assert subset.location == "/tmp/images/ABC/123" - assert ( - subset.__repr__() - == "Subset({'location': '/images/ABC/123', 'fields': {'data': {'type': 'binary'}}})" - ) - - -def test_subset_fields(): - """Test manipulating subset fields.""" - subset_spec = { - "location": "/images/ABC/123", - "fields": { - "data": { - "type": "binary", - }, - }, - } - subset = Subset(specification=subset_spec, base_path="/tmp") - - # add a field - subset.add_field(name="data2", type_=Type("binary")) - assert "data2" in subset.fields - - # add a duplicate field - with pytest.raises(ValueError, match="A field with name data2 already exists"): - subset.add_field(name="data2", type_=Type("binary")) - - # add a duplicate field but overwrite - subset.add_field(name="data2", type_=Type("string"), overwrite=True) - assert subset.fields["data2"].type == Type("string") - - # remove a field - subset.remove_field(name="data2") - assert "data2" not in subset.fields - - -def test_set_base_path(valid_manifest): - """Test altering the base path in the manifest.""" - manifest = Manifest(valid_manifest) - tmp_path = "/tmp/base_path" - manifest.update_metadata(key="base_path", value=tmp_path) - - assert manifest.base_path == tmp_path - assert manifest._specification["metadata"]["base_path"] == tmp_path - - -def test_from_to_file(valid_manifest): - """Test reading from and writing to file.""" - tmp_path = "/tmp/manifest.json" - with open(tmp_path, "w", encoding="utf-8") as f: - json.dump(valid_manifest, f) - - manifest = Manifest.from_file(tmp_path) - assert manifest.metadata == valid_manifest["metadata"] - - manifest.to_file(tmp_path) - with open(tmp_path, encoding="utf-8") as f: - assert json.load(f) == valid_manifest - - -def test_attribute_access(valid_manifest): - """ - Test that attributes can be accessed as expected: - - Fixed properties should be accessible as an attribute - - Dynamic properties should be accessible by lookup. - """ - manifest = Manifest(valid_manifest) - - assert manifest.metadata == valid_manifest["metadata"] - assert manifest.index.location == "gs://bucket/index" - assert manifest.subsets["images"].location == "gs://bucket/images" - assert manifest.subsets["images"].fields["data"].type == Type("binary") - - -def test_manifest_creation(): - """Test the stepwise creation of a manifest via the Manifest class.""" - base_path = "gs://bucket" - run_id = "run_id" - pipeline_name = "pipeline_name" - component_id = "component_id" - cache_key = "42" - - manifest = Manifest.create( - pipeline_name=pipeline_name, - base_path=base_path, - run_id=run_id, - component_id=component_id, - cache_key=cache_key, - ) - - manifest.add_subset("images", [("width", Type("int32")), ("height", Type("int32"))]) - manifest.subsets["images"].add_field("data", Type("binary")) - - assert manifest._specification == { - "metadata": { - "pipeline_name": pipeline_name, - "base_path": base_path, - "run_id": run_id, - "component_id": component_id, - "cache_key": cache_key, - }, - "index": {"location": f"/{pipeline_name}/{run_id}/{component_id}/index"}, - "subsets": { - "images": { - "location": f"/{pipeline_name}/{run_id}/{component_id}/images", - "fields": { - "width": { - "type": "int32", - }, - "height": { - "type": "int32", - }, - "data": { - "type": "binary", - }, - }, - }, - }, - } - - -def test_manifest_repr(): - manifest = Manifest.create( - pipeline_name="NAME", - base_path="/", - run_id="A", - component_id="1", - cache_key="42", - ) - assert ( - manifest.__repr__() - == "Manifest({'metadata': {'base_path': '/', 'pipeline_name': 'NAME', 'run_id': 'A'," - " 'component_id': '1', 'cache_key': '42'}," - " 'index': {'location': '/NAME/A/1/index'}, 'subsets': {}})" - ) - - -def test_manifest_alteration(valid_manifest): - """Test alteration functionalities of a manifest via the Manifest class.""" - manifest = Manifest(valid_manifest) - - # test adding a subset - manifest.add_subset( - "images2", - [("width", Type("int32")), ("height", Type("int32"))], - ) - assert "images2" in manifest.subsets - - # test adding a duplicate subset - with pytest.raises(ValueError, match="A subset with name images2 already exists"): - manifest.add_subset( - "images2", - [("width", Type("int32")), ("height", Type("int32"))], - ) - - # test removing a subset - manifest.remove_subset("images2") - assert "images2" not in manifest.subsets - - # test removing a nonexistant subset - with pytest.raises(ValueError, match="Subset pictures not found in specification"): - manifest.remove_subset("pictures") - - -def test_manifest_copy_and_adapt(valid_manifest): - """Test that a manifest can be copied and adapted without changing the original.""" - manifest = Manifest(valid_manifest) - new_manifest = manifest.copy() - new_manifest.remove_subset("images") - assert manifest._specification == valid_manifest - assert new_manifest._specification != valid_manifest - - -def test_no_validate_schema(monkeypatch, valid_manifest): - monkeypatch.setattr(pkgutil, "get_data", lambda package, resource: None) - with pytest.raises(FileNotFoundError): - Manifest(valid_manifest) - - -def test_index_fields(): - """Test that the fields property of Index returns the expected fields.""" - subset_spec = { - "location": "/images/ABC/123", - "fields": { - "data": { - "type": "binary", - }, - }, - } - - index = Index(specification=subset_spec, base_path="/tmp") - - expected_fields = { - "id": Field(name="id", type=Type("string")), - "source": Field(name="source", type=Type("string")), - } - - assert index.fields == expected_fields