Skip to content

Commit

Permalink
Merge pull request #2 from graphbookai/dev
Browse files Browse the repository at this point in the history
Updating docs
  • Loading branch information
rsamf authored Jan 1, 2025
2 parents e5d0d22 + e1d9664 commit 6616664
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 0 deletions.
13 changes: 13 additions & 0 deletions graphbook_huggingface/hf_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@


class HuggingfaceDataset(steps.GeneratorSourceStep):
"""
Loads a dataset from 🤗 Hugging Face and yields each dataset row as a Note.
The dataset is loaded using the `datasets.load_dataset` method.
If loading an dataset with images, you must specify the columns that contain images if you want them to be displayed in the UI.
Args:
dataset_id (str): The dataset ID from Huggingface
split (str): The split of the dataset to use
shuffle (bool): Whether to shuffle the dataset
log_data (bool): Whether to log the outputs as JSON to the node UI
image_columns (List[str]): The columns in the dataset that contain images. This is to let Graphbook know how to display the images in the UI.
kwargs (dict): Additional keyword arguments to pass to the internal Hugging Face datasets method `datasets.load_dataset` which gets used to load the dataset.
"""
RequiresInput = False
Parameters = {
"dataset_id": {
Expand Down
15 changes: 15 additions & 0 deletions graphbook_huggingface/hf_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@


class TransformersPipeline(steps.BatchStep):
"""
Loads and executes a model pipeline from 🤗 Hugging Face Transformers.
The model pipeline is loaded using the `transformers.pipeline` method.
Args:
model_id (str): The model ID from Huggingface
batch_size (int): The batch size for the pipeline
item_key (str): The key in the input item to use as input. Incoming values should be strings (or normal text)
device_id (str): The device ID (e.g, "cuda:0", "cpu") to use
fp16 (bool): Whether to use fp16
log_model_outputs (bool): Whether to log the model outputs as JSON to the node UI
parallelize_preprocessing (bool): Whether to parallelize preprocessing by sending inputs to the worker pool
match_dtypes (bool): Whether to match the dtype of the input_values to the model's torch_dtype
kwargs (dict): Additional keyword arguments to pass to the internal Hugging Face transformers method `transformers.pipeline` which gets used to load the pipeline.
"""
RequiresInput = True
Parameters = {
"model_id": {
Expand Down
32 changes: 32 additions & 0 deletions graphbook_huggingface/hf_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
from graphbook.utils import image

class MergeMasks(steps.Step):
"""
Merges multiple masks into a single mask by summing them and thresholding the result.
Args:
output_key (str): The key in the output item to use as output
delete_raw_output (bool): Whether to delete the raw Huggingface model output from the item
"""
RequiresInput = True
Parameters = {
"output_key": {
Expand Down Expand Up @@ -44,6 +51,12 @@ def on_note(self, note):
del note["model_output"]

class FilterMasks(steps.Step):
"""
Filters the masks based on the labels provided.
Args:
labels (List[str]): The labels to filter for
"""
RequiresInput = True
Parameters = {
"labels": {
Expand All @@ -65,6 +78,13 @@ def on_note(self, note):
note["model_output"] = filtered_output

class MaskOutputs(steps.Step):
"""
Parses the model output as masks and converts them to images for display in the Graphbook UI.
Args:
output_key (str): The key in the output item to use as output
delete_raw_output (bool): Whether to delete the raw Huggingface model output
"""
RequiresInput = True
Parameters = {
"output_key": {
Expand Down Expand Up @@ -99,6 +119,12 @@ def on_note(self, note):


class DepthOutputs(steps.Step):
"""
Parses the model output as depth maps and converts them to images for display in the Graphbook UI.
Args:
output_key (str): The key in the output item to use as output
"""
RequiresInput = True
Parameters = {
"output_key": {
Expand All @@ -121,6 +147,12 @@ def on_note(self, note):


class ImageClassificationMaxLabel(steps.Step):
"""
Outputs the label with the maximum score from the model output.
Args:
delete_raw_hf_output (bool): Whether to delete the raw Huggingface model output
"""
RequiresInput = True
Parameters = {
"delete_raw_hf_output": {
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ optional = true
[tool.poetry.group.peer.dependencies]
transformers = "^4.44.2"
datasets = "^2.21.0"
torch = "^2.0.0"

0 comments on commit 6616664

Please sign in to comment.