Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: use bytes for dataset element contents #64

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions gptscript/datasets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import base64
from typing import Dict
from pydantic import BaseModel
from pydantic import BaseModel, field_serializer, field_validator, BeforeValidator


class DatasetElementMeta(BaseModel):
name: str
Expand All @@ -9,7 +11,17 @@ class DatasetElementMeta(BaseModel):
class DatasetElement(BaseModel):
name: str
description: str
contents: str
contents: bytes

@field_serializer("contents")
def serialize_contents(self, value: bytes) -> str:
return base64.b64encode(value).decode("utf-8")

@field_validator("contents", mode="before")
def deserialize_contents(cls, value) -> bytes:
if isinstance(value, str):
return base64.b64decode(value)
return value


class DatasetMeta(BaseModel):
Expand Down
6 changes: 3 additions & 3 deletions gptscript/gptscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ async def create_dataset(self, workspace_id: str, name: str, description: str =
)
return Dataset.model_validate_json(res)

async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: str,
async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: bytes,
elementDescription: str = "") -> DatasetElementMeta:
if workspace_id == "":
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]
Expand All @@ -251,7 +251,7 @@ async def add_dataset_element(self, workspace_id: str, datasetID: str, elementNa
raise ValueError("datasetID cannot be empty")
elif elementName == "":
raise ValueError("elementName cannot be empty")
elif elementContent == "":
elif not elementContent:
raise ValueError("elementContent cannot be empty")

res = await self._run_basic_command(
Expand All @@ -260,7 +260,7 @@ async def add_dataset_element(self, workspace_id: str, datasetID: str, elementNa
"input": json.dumps({
"datasetID": datasetID,
"elementName": elementName,
"elementContent": elementContent,
"elementContent": base64.b64encode(elementContent).decode("utf-8"),
"elementDescription": elementDescription,
}),
"workspaceID": workspace_id,
Expand Down
12 changes: 6 additions & 6 deletions tests/test_gptscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,29 +771,29 @@ async def test_datasets(gptscript):
assert len(dataset.elements) == 0, "Expected dataset elements to be empty"

# Add an element
element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", "element1 contents",
element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", b"element1 contents",
"element1 description")
assert element_meta.name == "element1", "Expected element name to match"
assert element_meta.description == "element1 description", "Expected element description to match"

# Add two more elements
await gptscript.add_dataset_elements(workspace_id, dataset.id, [
DatasetElement(name="element2", contents="element2 contents", description="element2 description"),
DatasetElement(name="element3", contents="element3 contents", description="element3 description"),
DatasetElement(name="element2", contents=b"element2 contents", description="element2 description"),
DatasetElement(name="element3", contents=b"element3 contents", description="element3 description"),
])

# Get the elements
e1 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element1")
assert e1.name == "element1", "Expected element name to match"
assert e1.contents == "element1 contents", "Expected element contents to match"
assert e1.contents == b"element1 contents", "Expected element contents to match"
assert e1.description == "element1 description", "Expected element description to match"
e2 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element2")
assert e2.name == "element2", "Expected element name to match"
assert e2.contents == "element2 contents", "Expected element contents to match"
assert e2.contents == b"element2 contents", "Expected element contents to match"
assert e2.description == "element2 description", "Expected element description to match"
e3 = await gptscript.get_dataset_element(workspace_id, dataset.id, "element3")
assert e3.name == "element3", "Expected element name to match"
assert e3.contents == "element3 contents", "Expected element contents to match"
assert e3.contents == b"element3 contents", "Expected element contents to match"
assert e3.description == "element3 description", "Expected element description to match"

# List elements in the dataset
Expand Down