From 8b9713a945c9be0804d92dfe10732d8c7100c5bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Georges=20Lorr=C3=A9?=
<35808396+GeorgesLorre@users.noreply.github.com>
Date: Tue, 30 Jan 2024 15:37:52 +0100
Subject: [PATCH] Fix Readme script generation (#821)
This fixes the failing builds but is it what we want ?
---
components/caption_images/README.md | 2 +-
scripts/component_readme/generate_readme.py | 24 ++++++++++++---------
scripts/component_readme/readme_template.md | 16 +++++++-------
src/fondant/component/data_io.py | 2 +-
src/fondant/core/component_spec.py | 12 +++++++----
src/fondant/core/manifest.py | 2 +-
src/fondant/pipeline/compiler.py | 2 +-
src/fondant/pipeline/pipeline.py | 2 +-
tests/component/test_data_io.py | 7 ++++--
tests/core/test_component_specs.py | 2 +-
tests/core/test_manifest_evolution.py | 2 +-
tests/pipeline/test_pipeline.py | 2 +-
tests/pipeline/test_python_component.py | 8 +++----
13 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/components/caption_images/README.md b/components/caption_images/README.md
index 2158b173b..1ba005318 100644
--- a/components/caption_images/README.md
+++ b/components/caption_images/README.md
@@ -49,7 +49,7 @@ pipeline = Pipeline(...)
dataset = pipeline.read(...)
dataset = dataset.apply(
- "caption_images",
+ "",
arguments={
# Add arguments
# "model_id": "Salesforce/blip-image-captioning-base",
diff --git a/scripts/component_readme/generate_readme.py b/scripts/component_readme/generate_readme.py
index 587eb821d..b26953ee2 100644
--- a/scripts/component_readme/generate_readme.py
+++ b/scripts/component_readme/generate_readme.py
@@ -2,6 +2,7 @@
from pathlib import Path
import jinja2
+
from fondant.core.component_spec import ComponentSpec
@@ -11,24 +12,25 @@ def read_component_spec(component_spec_path: Path) -> ComponentSpec:
def generate_readme(component_spec: ComponentSpec, *, component_dir: Path) -> str:
env = jinja2.Environment(
- loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent),
- trim_blocks=True
+ loader=jinja2.loaders.FileSystemLoader(Path(__file__).parent), trim_blocks=True
)
env.filters["eval"] = eval
template = env.get_template("readme_template.md")
return template.render(
- id=component_dir.name,
+ component_id=component_spec.safe_name,
name=component_spec.name,
- component_folder_name=component_spec.component_folder_name,
description=component_spec.description,
consumes=component_spec.consumes,
produces=component_spec.produces,
is_consumes_generic=component_spec.is_generic("consumes"),
is_produces_generic=component_spec.is_generic("produces"),
- arguments=[arg for arg in component_spec.args.values()
- if arg.name not in component_spec.default_arguments],
+ arguments=[
+ arg
+ for arg in component_spec.args.values()
+ if arg.name not in component_spec.default_arguments
+ ],
tests=(component_dir / "tests").exists(),
tags=component_spec.tags,
)
@@ -48,10 +50,12 @@ def main(component_spec_path: Path):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
- parser.add_argument("component_specs",
- nargs="+",
- type=Path,
- help="Path to the component spec to generate a readme from")
+ parser.add_argument(
+ "component_specs",
+ nargs="+",
+ type=Path,
+ help="Path to the component spec to generate a readme from",
+ )
args = parser.parse_args()
for spec in args.component_specs:
diff --git a/scripts/component_readme/readme_template.md b/scripts/component_readme/readme_template.md
index 5907bc8a6..42244bab0 100644
--- a/scripts/component_readme/readme_template.md
+++ b/scripts/component_readme/readme_template.md
@@ -1,13 +1,13 @@
# {{ name }}
-
+
## Description
{{ description }}
-
+
## Inputs / outputs
-
+
### Consumes
{% if consumes %}
**This component consumes:**
@@ -33,7 +33,7 @@ See the usage example below on how to define a field name for additional fields.
{% endif %}
-
+
### Produces
{% if produces %}
**This component produces:**
@@ -55,7 +55,7 @@ the type of the field that should be used to write the output dataset.
**This component does not produce data.**
{% endif %}
-
+
## Arguments
{% if arguments %}
@@ -70,7 +70,7 @@ The component takes the following arguments to alter its behavior:
This component takes no arguments.
{% endif %}
-
+
## Usage
You can add this component to your pipeline using the following code:
@@ -94,7 +94,7 @@ dataset = dataset.apply(...)
dataset.write(
{% endif %}
{% endif %}
- "{{ id }}",
+ "{{ component_id }}",
arguments={
# Add arguments
{% for argument in arguments %}
@@ -121,7 +121,7 @@ dataset.write(
```
{% if tests %}
-
+
## Testing
You can run the tests using docker with BuildKit. From this directory, run:
diff --git a/src/fondant/component/data_io.py b/src/fondant/component/data_io.py
index 5e853c615..c3b7bfd06 100644
--- a/src/fondant/component/data_io.py
+++ b/src/fondant/component/data_io.py
@@ -199,7 +199,7 @@ def _write_dataframe(self, dataframe: dd.DataFrame) -> dd.core.Scalar:
"""Create dataframe writing task."""
location = (
f"{self.manifest.base_path}/{self.manifest.pipeline_name}/"
- f"{self.manifest.run_id}/{self.operation_spec.component_folder_name}"
+ f"{self.manifest.run_id}/{self.operation_spec.component_name}"
)
schema = {
diff --git a/src/fondant/core/component_spec.py b/src/fondant/core/component_spec.py
index 886039e5a..27fffce7c 100644
--- a/src/fondant/core/component_spec.py
+++ b/src/fondant/core/component_spec.py
@@ -100,7 +100,7 @@ def __init__(
tags: t.Optional[t.List[str]] = None,
):
spec_dict: t.Dict[str, t.Any] = {
- "name": self.sanitized_component_name(name),
+ "name": name,
"image": image,
}
@@ -179,6 +179,10 @@ def from_dict(cls, component_spec_dict: t.Dict[str, t.Any]) -> "ComponentSpec":
def name(self):
return self._specification["name"]
+ @property
+ def safe_name(self):
+ return self.sanitized_component_name(self._specification["name"])
+
def sanitized_component_name(self, name) -> str:
"""Cleans and converts a component name."""
return name.lower().replace(" ", "_")
@@ -516,9 +520,9 @@ def outer_produces(self) -> t.Mapping[str, Field]:
return self._outer_produces
@property
- def component_folder_name(self) -> str:
- """Get the component folder name."""
- return self._component_spec.name
+ def component_name(self) -> str:
+ """Get the component name."""
+ return self._component_spec.safe_name
@property
def previous_index(self) -> t.Optional[str]:
diff --git a/src/fondant/core/manifest.py b/src/fondant/core/manifest.py
index 15ecf87d7..7640f97a4 100644
--- a/src/fondant/core/manifest.py
+++ b/src/fondant/core/manifest.py
@@ -256,7 +256,7 @@ def evolve( # : PLR0912 (too many branches)
evolved_manifest = self.copy()
# Update `run_id` and `component_id` in the metadata
- component_id = operation_spec.component_folder_name
+ component_id = operation_spec.component_name
evolved_manifest.update_metadata(key="component_id", value=component_id)
evolved_manifest.update_metadata(key="run_id", value=run_id)
diff --git a/src/fondant/pipeline/compiler.py b/src/fondant/pipeline/compiler.py
index 5f06fa5d9..155c371c9 100644
--- a/src/fondant/pipeline/compiler.py
+++ b/src/fondant/pipeline/compiler.py
@@ -358,7 +358,7 @@ def from_fondant_component_spec(
re.sub(
"-+",
"-",
- re.sub("[^-0-9a-z]+", "-", fondant_component.name.lower()),
+ re.sub("[^-0-9a-z]+", "-", fondant_component.safe_name.lower()),
)
.lstrip("-")
.rstrip("-")
diff --git a/src/fondant/pipeline/pipeline.py b/src/fondant/pipeline/pipeline.py
index 38a35d024..8720cd83b 100644
--- a/src/fondant/pipeline/pipeline.py
+++ b/src/fondant/pipeline/pipeline.py
@@ -305,7 +305,7 @@ def _get_registry_path(name: str) -> Path:
@property
def component_name(self) -> str:
- return self.component_spec.name
+ return self.component_spec.safe_name
def get_component_cache_key(
self,
diff --git a/tests/component/test_data_io.py b/tests/component/test_data_io.py
index 85f9d9a55..ec733d3cc 100644
--- a/tests/component/test_data_io.py
+++ b/tests/component/test_data_io.py
@@ -137,7 +137,10 @@ def test_write_dataset(
data_writer.write_dataframe(dataframe, dask_client)
# read written data and assert
dataframe = dd.read_parquet(
- temp_dir / manifest.pipeline_name / manifest.run_id / component_spec.name,
+ temp_dir
+ / manifest.pipeline_name
+ / manifest.run_id
+ / component_spec.safe_name,
)
assert len(dataframe) == NUMBER_OF_TEST_ROWS
assert list(dataframe.columns) == columns
@@ -178,7 +181,7 @@ def test_write_dataset_custom_produces(
temp_dir
/ manifest.pipeline_name
/ manifest.run_id
- / component_spec_produces.name,
+ / component_spec_produces.safe_name,
)
assert len(dataframe) == NUMBER_OF_TEST_ROWS
assert list(dataframe.columns) == expected_columns
diff --git a/tests/core/test_component_specs.py b/tests/core/test_component_specs.py
index 179a03b9d..0f81ae265 100644
--- a/tests/core/test_component_specs.py
+++ b/tests/core/test_component_specs.py
@@ -91,7 +91,7 @@ def test_component_spec_no_args(valid_fondant_schema_no_args):
"""Test that a component spec without args is supported."""
fondant_component = ComponentSpec.from_dict(valid_fondant_schema_no_args)
- assert fondant_component.name == "example_component"
+ assert fondant_component.name == "Example component"
assert fondant_component.description == "This is an example component"
assert fondant_component.args == fondant_component.default_arguments
diff --git a/tests/core/test_manifest_evolution.py b/tests/core/test_manifest_evolution.py
index 33d5bdbca..b8e03ef0e 100644
--- a/tests/core/test_manifest_evolution.py
+++ b/tests/core/test_manifest_evolution.py
@@ -135,5 +135,5 @@ def test_component_spec_location_update():
)
assert evolved_manifest.index.location.endswith(
- component_spec.name,
+ component_spec.safe_name,
)
diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py
index 48abf51ee..3360c7a42 100644
--- a/tests/pipeline/test_pipeline.py
+++ b/tests/pipeline/test_pipeline.py
@@ -93,7 +93,7 @@ def load(self) -> dd.DataFrame:
component = ComponentOp.from_ref(Foo, produces={"bar": pa.string()})
assert component.component_spec._specification == {
- "name": "foo",
+ "name": "Foo",
"image": fondant_image_name,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
diff --git a/tests/pipeline/test_python_component.py b/tests/pipeline/test_python_component.py
index 22f5f208f..da018a6ae 100644
--- a/tests/pipeline/test_python_component.py
+++ b/tests/pipeline/test_python_component.py
@@ -94,7 +94,7 @@ def load(self) -> dd.DataFrame:
].operation_spec.to_dict()
assert operation_spec_dict == {
"specification": {
- "name": "createdata",
+ "name": "CreateData",
"image": "python:3.8-slim-buster",
"description": "lightweight component",
"consumes": {"additionalProperties": True},
@@ -138,7 +138,7 @@ def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
operation_spec_dict = pipeline._graph["addn"]["operation"].operation_spec.to_dict()
assert operation_spec_dict == {
"specification": {
- "name": "addn",
+ "name": "AddN",
"image": default_fondant_image,
"description": "lightweight component",
"consumes": {"additionalProperties": True},
@@ -200,7 +200,7 @@ def load(self) -> dd.DataFrame:
assert operation_spec_without_image == {
"specification": {
- "name": "createdata",
+ "name": "CreateData",
"image": "python:3.8-slim-buster",
"description": "lightweight component",
"consumes": {"additionalProperties": True},
@@ -289,7 +289,7 @@ def load(self) -> dd.DataFrame:
assert operation_spec_without_image == {
"specification": {
- "name": "createdata",
+ "name": "CreateData",
"image": default_fondant_image,
"description": "lightweight component",
"consumes": {"additionalProperties": True},