Skip to content

Commit

Permalink
Merge branch 'main' into bugfix-dataexplorer-images
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilippeMoussalli committed Aug 23, 2023
2 parents cef20eb + 50f3a97 commit 1694d8a
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 45 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ wheels/
*.egg-info/
.installed.cfg
*.egg
.tox
MANIFEST

# DS_Store (MacOS)
Expand All @@ -85,8 +86,12 @@ MANIFEST
# kubeflow artifacts
*.tgz

# Coverage files
*.coverage

# docker artifacts
examples/pipelines/**/docker-compose.yml
examples/pipelines/**/pipeline.yaml

# yaml-e files
*.yaml-e
3 changes: 2 additions & 1 deletion components/load_from_hf_hub/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def load(self) -> dd.DataFrame:
# 4) Optional: only return specific amount of rows
if self.n_rows_to_load is not None:
partitions_length = 0
for npartitions, partition in enumerate(dask_df.partitions):
npartitions = 1
for npartitions, partition in enumerate(dask_df.partitions, start=1):
if partitions_length >= self.n_rows_to_load:
logger.info(f"""Required number of partitions to load\n
{self.n_rows_to_load} is {npartitions}""")
Expand Down
1 change: 0 additions & 1 deletion data_explorer/app/numeric_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
from typing import List

import dask
import dask.dataframe as dd
import pandas as pd
import streamlit as st
Expand Down
25 changes: 15 additions & 10 deletions examples/pipelines/finetune_stable_diffusion/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

logger = logging.getLogger(__name__)
# General configs
pipeline_name = "Test fondant pipeline"
pipeline_description = "A test pipeline"
pipeline_name = "stable_diffusion_pipeline"
pipeline_description = (
"Pipeline to prepare and collect data for finetuning stable diffusion"
)

load_component_column_mapping = {"image": "images_data", "text": "captions_data"}

Expand All @@ -25,7 +27,7 @@
"dataset_name": "logo-wizard/modern-logo-dataset",
"column_name_mapping": load_component_column_mapping,
"image_column_names": ["image"],
"nb_rows_to_load": None,
"n_rows_to_load": None,
},
)

Expand Down Expand Up @@ -81,12 +83,15 @@
number_of_gpus=1,
)

pipeline = Pipeline(pipeline_name=pipeline_name, base_path=PipelineConfigs.BASE_PATH)
pipeline = Pipeline(
pipeline_name=pipeline_name,
base_path="/home/philippe/Scripts/express/local_artifact/new",
)

pipeline.add_op(load_from_hub_op)
pipeline.add_op(image_resolution_extraction_op, dependencies=load_from_hub_op)
pipeline.add_op(image_embedding_op, dependencies=image_resolution_extraction_op)
pipeline.add_op(laion_retrieval_op, dependencies=image_embedding_op)
pipeline.add_op(download_images_op, dependencies=laion_retrieval_op)
pipeline.add_op(caption_images_op, dependencies=download_images_op)
pipeline.add_op(write_to_hub, dependencies=caption_images_op)
# pipeline.add_op(image_resolution_extraction_op, dependencies=load_from_hub_op)
# pipeline.add_op(image_embedding_op, dependencies=image_resolution_extraction_op)
# pipeline.add_op(laion_retrieval_op, dependencies=image_embedding_op)
# pipeline.add_op(download_images_op, dependencies=laion_retrieval_op)
# pipeline.add_op(caption_images_op, dependencies=download_images_op)
# pipeline.add_op(write_to_hub, dependencies=caption_images_op)
5 changes: 3 additions & 2 deletions src/fondant/component_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,9 @@ def from_fondant_component_spec(
"container": {
"image": fondant_component.image,
"command": [
"python3",
"main.py",
"fondant",
"execute",
"main",
"--input_manifest_path",
{"inputPath": "input_manifest_path"},
"--metadata",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ spec:
- container:
args: []
command:
- python3
- main.py
- fondant
- execute
- main
- --input_manifest_path
- /tmp/inputs/input_manifest_path/data
- --metadata
Expand Down Expand Up @@ -54,10 +55,10 @@ spec:
"{\"base_path\": \"/foo/bar\", \"pipeline_name\": \"test_pipeline\", \"run_id\":
\"test_pipeline-20230101000000\", \"component_id\": \"first_component\"}",
"storage_args": "a dummy string arg"}'
pipelines.kubeflow.org/component_ref: '{"digest": "2a304ce49a15404ba50dfd8b56ec43fa8ac8c29f80579d1c8fb974d3f1a5c87f"}'
pipelines.kubeflow.org/component_ref: '{"digest": "c53791e5eba77643348ea14b01bc20f273c32d827f5f1b1b896ef6965fd12d82"}'
pipelines.kubeflow.org/component_spec: '{"description": "This is an example
component", "implementation": {"container": {"command": ["python3", "main.py",
"--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
component", "implementation": {"container": {"command": ["fondant", "execute",
"main", "--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
{"inputValue": "metadata"}, "--component_spec", {"inputValue": "component_spec"},
"--input_partition_rows", {"inputValue": "input_partition_rows"}, "--storage_args",
{"inputValue": "storage_args"}, "--output_manifest_path", {"outputPath":
Expand All @@ -84,8 +85,9 @@ spec:
- container:
args: []
command:
- python3
- main.py
- fondant
- execute
- main
- --input_manifest_path
- /tmp/inputs/input_manifest_path/data
- --metadata
Expand Down Expand Up @@ -120,10 +122,10 @@ spec:
\"pipeline_name\": \"test_pipeline\", \"run_id\": \"test_pipeline-20230101000000\",
\"component_id\": \"second_component\"}", "storage_args": "a dummy string
arg"}'
pipelines.kubeflow.org/component_ref: '{"digest": "a02b0189397a2d9318982201f020dbbbe3962427ed150fe58cc69ff508cc68bb"}'
pipelines.kubeflow.org/component_ref: '{"digest": "455aeccd323115d9caae33621d3ecf5ad4de86da321f97c3761f77bc962f7fc2"}'
pipelines.kubeflow.org/component_spec: '{"description": "This is an example
component", "implementation": {"container": {"command": ["python3", "main.py",
"--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
component", "implementation": {"container": {"command": ["fondant", "execute",
"main", "--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
{"inputValue": "metadata"}, "--component_spec", {"inputValue": "component_spec"},
"--input_partition_rows", {"inputValue": "input_partition_rows"}, "--storage_args",
{"inputValue": "storage_args"}, "--output_manifest_path", {"outputPath":
Expand Down Expand Up @@ -171,8 +173,9 @@ spec:
- container:
args: []
command:
- python3
- main.py
- fondant
- execute
- main
- --input_manifest_path
- /tmp/inputs/input_manifest_path/data
- --metadata
Expand Down Expand Up @@ -210,10 +213,10 @@ spec:
"None", "metadata": "{\"base_path\": \"/foo/bar\", \"pipeline_name\": \"test_pipeline\",
\"run_id\": \"test_pipeline-20230101000000\", \"component_id\": \"third_component\"}",
"storage_args": "a dummy string arg"}'
pipelines.kubeflow.org/component_ref: '{"digest": "698791c6aa2ed14d4b337840116a7a995f403e5be414389b05ccf7942b9e4437"}'
pipelines.kubeflow.org/component_ref: '{"digest": "4e728e3a6242c68816de163eb5ec0398940c5fb6746adf57223ca595103e6c2a"}'
pipelines.kubeflow.org/component_spec: '{"description": "This is an example
component", "implementation": {"container": {"command": ["python3", "main.py",
"--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
component", "implementation": {"container": {"command": ["fondant", "execute",
"main", "--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
{"inputValue": "metadata"}, "--component_spec", {"inputValue": "component_spec"},
"--input_partition_rows", {"inputValue": "input_partition_rows"}, "--storage_args",
{"inputValue": "storage_args"}, "--output_manifest_path", {"outputPath":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ spec:
- container:
args: []
command:
- python3
- main.py
- fondant
- execute
- main
- --input_manifest_path
- /tmp/inputs/input_manifest_path/data
- --metadata
Expand Down Expand Up @@ -54,10 +55,10 @@ spec:
"{\"base_path\": \"/foo/bar\", \"pipeline_name\": \"test_pipeline\", \"run_id\":
\"test_pipeline-20230101000000\", \"component_id\": \"first_component\"}",
"storage_args": "a dummy string arg"}'
pipelines.kubeflow.org/component_ref: '{"digest": "2a304ce49a15404ba50dfd8b56ec43fa8ac8c29f80579d1c8fb974d3f1a5c87f"}'
pipelines.kubeflow.org/component_ref: '{"digest": "c53791e5eba77643348ea14b01bc20f273c32d827f5f1b1b896ef6965fd12d82"}'
pipelines.kubeflow.org/component_spec: '{"description": "This is an example
component", "implementation": {"container": {"command": ["python3", "main.py",
"--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
component", "implementation": {"container": {"command": ["fondant", "execute",
"main", "--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
{"inputValue": "metadata"}, "--component_spec", {"inputValue": "component_spec"},
"--input_partition_rows", {"inputValue": "input_partition_rows"}, "--storage_args",
{"inputValue": "storage_args"}, "--output_manifest_path", {"outputPath":
Expand All @@ -84,8 +85,9 @@ spec:
- container:
args: []
command:
- python3
- main.py
- fondant
- execute
- main
- --input_manifest_path
- /tmp/inputs/input_manifest_path/data
- --metadata
Expand Down Expand Up @@ -132,10 +134,10 @@ spec:
"cropping_threshold": "0", "input_partition_rows": "None", "metadata": "{\"base_path\":
\"/foo/bar\", \"pipeline_name\": \"test_pipeline\", \"run_id\": \"test_pipeline-20230101000000\",
\"component_id\": \"image_cropping\"}", "padding": "0"}'
pipelines.kubeflow.org/component_ref: '{"digest": "e86f02b6b9cc878b6187e44bb3caf9291c3ce42c1939e19b0a97dacdc78a9d72"}'
pipelines.kubeflow.org/component_ref: '{"digest": "8a3c2b5736cf8297ad5848ec043987aed42c6fb12e6e26db25b922467b4d2d7f"}'
pipelines.kubeflow.org/component_spec: '{"description": "Component that removes
single-colored borders around images and crops them appropriately", "implementation":
{"container": {"command": ["python3", "main.py", "--input_manifest_path",
{"container": {"command": ["fondant", "execute", "main", "--input_manifest_path",
{"inputPath": "input_manifest_path"}, "--metadata", {"inputValue": "metadata"},
"--component_spec", {"inputValue": "component_spec"}, "--input_partition_rows",
{"inputValue": "input_partition_rows"}, "--cropping_threshold", {"inputValue":
Expand Down
11 changes: 6 additions & 5 deletions tests/example_pipelines/compiled_pipeline/kubeflow_pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ spec:
- container:
args: []
command:
- python3
- main.py
- fondant
- execute
- main
- --input_manifest_path
- /tmp/inputs/input_manifest_path/data
- --metadata
Expand Down Expand Up @@ -57,10 +58,10 @@ spec:
"{\"base_path\": \"/foo/bar\", \"pipeline_name\": \"test_pipeline\", \"run_id\":
\"test_pipeline-20230101000000\", \"component_id\": \"first_component\"}",
"storage_args": "a dummy string arg"}'
pipelines.kubeflow.org/component_ref: '{"digest": "2a304ce49a15404ba50dfd8b56ec43fa8ac8c29f80579d1c8fb974d3f1a5c87f"}'
pipelines.kubeflow.org/component_ref: '{"digest": "c53791e5eba77643348ea14b01bc20f273c32d827f5f1b1b896ef6965fd12d82"}'
pipelines.kubeflow.org/component_spec: '{"description": "This is an example
component", "implementation": {"container": {"command": ["python3", "main.py",
"--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
component", "implementation": {"container": {"command": ["fondant", "execute",
"main", "--input_manifest_path", {"inputPath": "input_manifest_path"}, "--metadata",
{"inputValue": "metadata"}, "--component_spec", {"inputValue": "component_spec"},
"--input_partition_rows", {"inputValue": "input_partition_rows"}, "--storage_args",
{"inputValue": "storage_args"}, "--output_manifest_path", {"outputPath":
Expand Down
5 changes: 3 additions & 2 deletions tests/example_specs/component_specs/kubeflow_component.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ implementation:
container:
image: example_component:latest
command:
- python3
- main.py
- fondant
- execute
- main
- --input_manifest_path
- inputPath: input_manifest_path
- --metadata
Expand Down

0 comments on commit 1694d8a

Please sign in to comment.