Merge pull request #2 from graphbookai/dev

Updating docs
graphbookai · Jan 1, 2025 · 6616664 · 6616664
2 parents e5d0d22 + e1d9664
commit 6616664
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 0 deletions.
diff --git a/graphbook_huggingface/hf_datasets.py b/graphbook_huggingface/hf_datasets.py
@@ -5,6 +5,19 @@
 
 
 class HuggingfaceDataset(steps.GeneratorSourceStep):
+    """
+    Loads a dataset from 🤗 Hugging Face and yields each dataset row as a Note.
+    The dataset is loaded using the `datasets.load_dataset` method.
+    If loading an dataset with images, you must specify the columns that contain images if you want them to be displayed in the UI.
+    
+    Args:
+        dataset_id (str): The dataset ID from Huggingface
+        split (str): The split of the dataset to use
+        shuffle (bool): Whether to shuffle the dataset
+        log_data (bool): Whether to log the outputs as JSON to the node UI
+        image_columns (List[str]): The columns in the dataset that contain images. This is to let Graphbook know how to display the images in the UI.
+        kwargs (dict): Additional keyword arguments to pass to the internal Hugging Face datasets method `datasets.load_dataset` which gets used to load the dataset.
+    """
     RequiresInput = False
     Parameters = {
         "dataset_id": {

diff --git a/graphbook_huggingface/hf_pipeline.py b/graphbook_huggingface/hf_pipeline.py
@@ -9,6 +9,21 @@
 
 
 class TransformersPipeline(steps.BatchStep):
+    """
+    Loads and executes a model pipeline from 🤗 Hugging Face Transformers.
+    The model pipeline is loaded using the `transformers.pipeline` method.
+    
+    Args:
+        model_id (str): The model ID from Huggingface
+        batch_size (int): The batch size for the pipeline
+        item_key (str): The key in the input item to use as input. Incoming values should be strings (or normal text)
+        device_id (str): The device ID (e.g, "cuda:0", "cpu") to use
+        fp16 (bool): Whether to use fp16
+        log_model_outputs (bool): Whether to log the model outputs as JSON to the node UI
+        parallelize_preprocessing (bool): Whether to parallelize preprocessing by sending inputs to the worker pool
+        match_dtypes (bool): Whether to match the dtype of the input_values to the model's torch_dtype
+        kwargs (dict): Additional keyword arguments to pass to the internal Hugging Face transformers method `transformers.pipeline` which gets used to load the pipeline.
+    """
     RequiresInput = True
     Parameters = {
         "model_id": {

diff --git a/graphbook_huggingface/hf_postprocessing.py b/graphbook_huggingface/hf_postprocessing.py
@@ -5,6 +5,13 @@
 from graphbook.utils import image
 
 class MergeMasks(steps.Step):
+    """
+    Merges multiple masks into a single mask by summing them and thresholding the result.
+    
+    Args:
+        output_key (str): The key in the output item to use as output
+        delete_raw_output (bool): Whether to delete the raw Huggingface model output from the item
+    """
     RequiresInput = True
     Parameters = {
         "output_key": {
@@ -44,6 +51,12 @@ def on_note(self, note):
             del note["model_output"]
 
 class FilterMasks(steps.Step):
+    """
+    Filters the masks based on the labels provided.
+    
+    Args:
+        labels (List[str]): The labels to filter for
+    """
     RequiresInput = True
     Parameters = {
         "labels": {
@@ -65,6 +78,13 @@ def on_note(self, note):
         note["model_output"] = filtered_output
 
 class MaskOutputs(steps.Step):
+    """
+    Parses the model output as masks and converts them to images for display in the Graphbook UI.
+    
+    Args:
+        output_key (str): The key in the output item to use as output
+        delete_raw_output (bool): Whether to delete the raw Huggingface model output
+    """
     RequiresInput = True
     Parameters = {
         "output_key": {
@@ -99,6 +119,12 @@ def on_note(self, note):
 
 
 class DepthOutputs(steps.Step):
+    """
+    Parses the model output as depth maps and converts them to images for display in the Graphbook UI.
+    
+    Args:
+        output_key (str): The key in the output item to use as output
+    """
     RequiresInput = True
     Parameters = {
         "output_key": {
@@ -121,6 +147,12 @@ def on_note(self, note):
 
 
 class ImageClassificationMaxLabel(steps.Step):
+    """
+    Outputs the label with the maximum score from the model output.
+    
+    Args:
+        delete_raw_hf_output (bool): Whether to delete the raw Huggingface model output
+    """
     RequiresInput = True
     Parameters = {
         "delete_raw_hf_output": {

diff --git a/pyproject.toml b/pyproject.toml
@@ -28,3 +28,4 @@ optional = true
 [tool.poetry.group.peer.dependencies]
 transformers = "^4.44.2"
 datasets = "^2.21.0"
+torch = "^2.0.0"