diff --git a/graphbook_huggingface/hf_datasets.py b/graphbook_huggingface/hf_datasets.py index 1b0bf55..fb8c219 100644 --- a/graphbook_huggingface/hf_datasets.py +++ b/graphbook_huggingface/hf_datasets.py @@ -5,6 +5,19 @@ class HuggingfaceDataset(steps.GeneratorSourceStep): + """ + Loads a dataset from 🤗 Hugging Face and yields each dataset row as a Note. + The dataset is loaded using the `datasets.load_dataset` method. + If loading an dataset with images, you must specify the columns that contain images if you want them to be displayed in the UI. + + Args: + dataset_id (str): The dataset ID from Huggingface + split (str): The split of the dataset to use + shuffle (bool): Whether to shuffle the dataset + log_data (bool): Whether to log the outputs as JSON to the node UI + image_columns (List[str]): The columns in the dataset that contain images. This is to let Graphbook know how to display the images in the UI. + kwargs (dict): Additional keyword arguments to pass to the internal Hugging Face datasets method `datasets.load_dataset` which gets used to load the dataset. + """ RequiresInput = False Parameters = { "dataset_id": { diff --git a/graphbook_huggingface/hf_pipeline.py b/graphbook_huggingface/hf_pipeline.py index c927504..a4b519b 100644 --- a/graphbook_huggingface/hf_pipeline.py +++ b/graphbook_huggingface/hf_pipeline.py @@ -9,6 +9,21 @@ class TransformersPipeline(steps.BatchStep): + """ + Loads and executes a model pipeline from 🤗 Hugging Face Transformers. + The model pipeline is loaded using the `transformers.pipeline` method. + + Args: + model_id (str): The model ID from Huggingface + batch_size (int): The batch size for the pipeline + item_key (str): The key in the input item to use as input. Incoming values should be strings (or normal text) + device_id (str): The device ID (e.g, "cuda:0", "cpu") to use + fp16 (bool): Whether to use fp16 + log_model_outputs (bool): Whether to log the model outputs as JSON to the node UI + parallelize_preprocessing (bool): Whether to parallelize preprocessing by sending inputs to the worker pool + match_dtypes (bool): Whether to match the dtype of the input_values to the model's torch_dtype + kwargs (dict): Additional keyword arguments to pass to the internal Hugging Face transformers method `transformers.pipeline` which gets used to load the pipeline. + """ RequiresInput = True Parameters = { "model_id": { diff --git a/graphbook_huggingface/hf_postprocessing.py b/graphbook_huggingface/hf_postprocessing.py index bf11578..9d45e3d 100644 --- a/graphbook_huggingface/hf_postprocessing.py +++ b/graphbook_huggingface/hf_postprocessing.py @@ -5,6 +5,13 @@ from graphbook.utils import image class MergeMasks(steps.Step): + """ + Merges multiple masks into a single mask by summing them and thresholding the result. + + Args: + output_key (str): The key in the output item to use as output + delete_raw_output (bool): Whether to delete the raw Huggingface model output from the item + """ RequiresInput = True Parameters = { "output_key": { @@ -44,6 +51,12 @@ def on_note(self, note): del note["model_output"] class FilterMasks(steps.Step): + """ + Filters the masks based on the labels provided. + + Args: + labels (List[str]): The labels to filter for + """ RequiresInput = True Parameters = { "labels": { @@ -65,6 +78,13 @@ def on_note(self, note): note["model_output"] = filtered_output class MaskOutputs(steps.Step): + """ + Parses the model output as masks and converts them to images for display in the Graphbook UI. + + Args: + output_key (str): The key in the output item to use as output + delete_raw_output (bool): Whether to delete the raw Huggingface model output + """ RequiresInput = True Parameters = { "output_key": { @@ -99,6 +119,12 @@ def on_note(self, note): class DepthOutputs(steps.Step): + """ + Parses the model output as depth maps and converts them to images for display in the Graphbook UI. + + Args: + output_key (str): The key in the output item to use as output + """ RequiresInput = True Parameters = { "output_key": { @@ -121,6 +147,12 @@ def on_note(self, note): class ImageClassificationMaxLabel(steps.Step): + """ + Outputs the label with the maximum score from the model output. + + Args: + delete_raw_hf_output (bool): Whether to delete the raw Huggingface model output + """ RequiresInput = True Parameters = { "delete_raw_hf_output": { diff --git a/pyproject.toml b/pyproject.toml index f33a37a..5099847 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,3 +28,4 @@ optional = true [tool.poetry.group.peer.dependencies] transformers = "^4.44.2" datasets = "^2.21.0" +torch = "^2.0.0"