diff --git a/components/caption_images/fondant_component.yaml b/components/caption_images/fondant_component.yaml index 254d8a22c..24a9f6815 100644 --- a/components/caption_images/fondant_component.yaml +++ b/components/caption_images/fondant_component.yaml @@ -18,9 +18,12 @@ args: model_id: description: id of the model on the Hugging Face hub type: str + default: "Salesforce/blip-image-captioning-base" batch_size: description: batch size to use type: int + default: 8 max_new_tokens: description: maximum token length of each caption - type: int \ No newline at end of file + type: int + default: 50 \ No newline at end of file diff --git a/components/embedding_based_laion_retrieval/fondant_component.yaml b/components/embedding_based_laion_retrieval/fondant_component.yaml index f3bab6fb6..0380ba526 100644 --- a/components/embedding_based_laion_retrieval/fondant_component.yaml +++ b/components/embedding_based_laion_retrieval/fondant_component.yaml @@ -24,6 +24,8 @@ args: aesthetic_score: description: Aesthetic embedding to add to the query embedding, between 0 and 9 (higher is prettier). type: int + default: 9 aesthetic_weight: description: Weight of the aesthetic embedding when added to the query, between 0 and 1 - type: float \ No newline at end of file + type: float + default: 0.5 \ No newline at end of file diff --git a/components/filter_comments/fondant_component.yaml b/components/filter_comments/fondant_component.yaml index 383c90f63..4368798b0 100644 --- a/components/filter_comments/fondant_component.yaml +++ b/components/filter_comments/fondant_component.yaml @@ -13,6 +13,8 @@ args: min_comments_ratio: description: The minimum code to comment ratio type: float + default: 0.1 max_comments_ratio: description: The maximum code to comment ratio - type: float \ No newline at end of file + type: float + default: 0.9 \ No newline at end of file diff --git a/components/image_cropping/fondant_component.yaml b/components/image_cropping/fondant_component.yaml index 457df2405..b21a7ae1d 100644 --- a/components/image_cropping/fondant_component.yaml +++ b/components/image_cropping/fondant_component.yaml @@ -22,6 +22,8 @@ args: cropping_threshold: description: Threshold parameter used for detecting borders. A lower (negative) parameter results in a more performant border detection, but can cause overcropping. Default is -30 type: int + default: -30 padding: description: Padding for the image cropping. The padding is added to all borders of the image. type: int + default: 10 diff --git a/components/image_cropping/src/main.py b/components/image_cropping/src/main.py index 318c4d6b9..2be401db8 100644 --- a/components/image_cropping/src/main.py +++ b/components/image_cropping/src/main.py @@ -34,8 +34,8 @@ def transform( self, *, dataframe: dd.DataFrame, - cropping_threshold: int = -30, - padding: int = 10, + cropping_threshold: int, + padding: int, ) -> dd.DataFrame: """ Args: diff --git a/components/image_embedding/fondant_component.yaml b/components/image_embedding/fondant_component.yaml index b78ff8047..e4bd7a9c6 100644 --- a/components/image_embedding/fondant_component.yaml +++ b/components/image_embedding/fondant_component.yaml @@ -20,6 +20,8 @@ args: model_id: description: Model id on the Hugging Face hub (e.g. "openai/clip-vit-large-patch14") type: str + default: "openai/clip-vit-large-patch14" batch_size: description: Batch size to use when embedding - type: int \ No newline at end of file + type: int + default: 8 \ No newline at end of file diff --git a/components/language_filter/fondant_component.yaml b/components/language_filter/fondant_component.yaml index 82d321fda..d639a9a6e 100644 --- a/components/language_filter/fondant_component.yaml +++ b/components/language_filter/fondant_component.yaml @@ -11,4 +11,5 @@ consumes: args: language: description: A valid language code or identifier (e.g., "en", "fr", "de"). - type: str \ No newline at end of file + type: str + default: "en" diff --git a/components/language_filter/src/main.py b/components/language_filter/src/main.py index c2e3dcc02..540f40163 100644 --- a/components/language_filter/src/main.py +++ b/components/language_filter/src/main.py @@ -7,21 +7,22 @@ logger = logging.getLogger(__name__) +MODEL_PATH = "lid.176.ftz" + class LanguageIdentification: """A class for language detection using FastText.""" - def __init__(self, language, model_path: str = "lid.176.ftz"): + def __init__(self, + language: str): """ Initializes the LanguageDetect class. Args: language (str): language to filter on - model_path (str): The path to the FastText language identification model. """ - pretrained_lang_model_weight_path = model_path self.language = language - self.model = fasttext.load_model(pretrained_lang_model_weight_path) + self.model = fasttext.load_model(MODEL_PATH) def predict_lang(self, text: str): """ @@ -52,7 +53,6 @@ def setup(self, *, language): """ self.lang_detector = LanguageIdentification(language) - def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame: """ Args: diff --git a/components/prompt_based_laion_retrieval/fondant_component.yaml b/components/prompt_based_laion_retrieval/fondant_component.yaml index 5fa3bf331..544f7afc8 100644 --- a/components/prompt_based_laion_retrieval/fondant_component.yaml +++ b/components/prompt_based_laion_retrieval/fondant_component.yaml @@ -22,10 +22,13 @@ args: aesthetic_score: description: Aesthetic embedding to add to the query embedding, between 0 and 9 (higher is prettier). type: int + default: 9 aesthetic_weight: description: Weight of the aesthetic embedding when added to the query, between 0 and 1 type: float + default: 0.5 url: description: The url of the backend clip retrieval service, defaults to the public service type: str - default: https://knn.laion.ai/knn-service \ No newline at end of file + default: https://knn.laion.ai/knn-service + diff --git a/components/segment_images/fondant_component.yaml b/components/segment_images/fondant_component.yaml index 87a36a1ba..f0f73a7f1 100644 --- a/components/segment_images/fondant_component.yaml +++ b/components/segment_images/fondant_component.yaml @@ -18,6 +18,8 @@ args: model_id: description: id of the model on the Hugging Face hub type: str + default: "openmmlab/upernet-convnext-small" batch_size: description: batch size to use - type: int \ No newline at end of file + type: int + batch_size: 8 \ No newline at end of file diff --git a/components/segment_images/src/main.py b/components/segment_images/src/main.py index c552cf028..434b6fb50 100644 --- a/components/segment_images/src/main.py +++ b/components/segment_images/src/main.py @@ -40,7 +40,10 @@ def convert_to_rgb(seg: np.array) -> bytes: return crop_bytes.getvalue() -def process_image(image: bytes, *, processor: SegformerImageProcessor, device: str) -> torch.Tensor: +def process_image(image: bytes, + *, + processor: SegformerImageProcessor, + device: str) -> torch.Tensor: """ Process the image to a tensor. @@ -65,7 +68,9 @@ def transform(img: Image) -> BatchFeature: @torch.no_grad() -def segment_image_batch(image_batch: pd.DataFrame, *, model: AutoModelForSemanticSegmentation, +def segment_image_batch(image_batch: pd.DataFrame, + *, + model: AutoModelForSemanticSegmentation, processor: SegformerImageProcessor) -> pd.Series: """Embed a batch of images.""" input_batch = torch.cat(image_batch.tolist()) diff --git a/tests/example_pipelines/compiled_pipeline/example_1/docker-compose.yml b/tests/example_pipelines/compiled_pipeline/example_1/docker-compose.yml index c9bf9b1a5..09da11c28 100644 --- a/tests/example_pipelines/compiled_pipeline/example_1/docker-compose.yml +++ b/tests/example_pipelines/compiled_pipeline/example_1/docker-compose.yml @@ -63,5 +63,4 @@ services: second_component: condition: service_completed_successfully volumes: [] -version: '3.8' - +version: '3.8' \ No newline at end of file diff --git a/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml b/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml index c6ce8d047..9f561ddba 100644 --- a/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml +++ b/tests/example_pipelines/compiled_pipeline/example_2/docker-compose.yml @@ -34,8 +34,9 @@ services: "height": {"type": "int16"}}}}, "args": {"cropping_threshold": {"description": "Threshold parameter used for detecting borders. A lower (negative) parameter results in a more performant border detection, but can cause overcropping. Default - is -30", "type": "int"}, "padding": {"description": "Padding for the image cropping. - The padding is added to all borders of the image.", "type": "int"}}}' + is -30", "type": "int", "default": -30}, "padding": {"description": "Padding + for the image cropping. The padding is added to all borders of the image.", + "type": "int", "default": 10}}}' - --input_manifest_path - /foo/bar/first_component/manifest.json depends_on: @@ -43,4 +44,4 @@ services: condition: service_completed_successfully image: ghcr.io/ml6team/image_cropping:dev volumes: [] -version: '3.8' +version: '3.8' \ No newline at end of file