Skip to content

Commit

Permalink
chore: update datasets API
Browse files Browse the repository at this point in the history
Signed-off-by: Donnie Adams <[email protected]>
  • Loading branch information
thedadams committed Oct 25, 2024
1 parent 94fe795 commit e64740b
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 66 deletions.
77 changes: 46 additions & 31 deletions gptscript/gptscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,35 +212,39 @@ async def delete_credential(self, context: str = "default", name: str = "") -> s
{"context": [context], "name": name}
)

async def list_datasets(self, workspace: str) -> List[DatasetMeta]:
if workspace == "":
workspace = os.environ["GPTSCRIPT_WORKSPACE_DIR"]
async def list_datasets(self, workspace_id: str) -> List[DatasetMeta]:
if workspace_id == "":
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]

res = await self._run_basic_command(
"datasets",
{"input": "{}", "workspace": workspace, "datasetToolRepo": self.opts.DatasetToolRepo}
{"input": "{}", "workspaceID": workspace_id, "datasetToolRepo": self.opts.DatasetToolRepo,
"env": self.opts.Env}
)
return [DatasetMeta.model_validate(d) for d in json.loads(res)]

async def create_dataset(self, workspace: str, name: str, description: str = "") -> Dataset:
if workspace == "":
workspace = os.environ["GPTSCRIPT_WORKSPACE_DIR"]
async def create_dataset(self, workspace_id: str, name: str, description: str = "") -> Dataset:
if workspace_id == "":
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]

if name == "":
raise ValueError("name cannot be empty")

res = await self._run_basic_command(
"datasets/create",
{"input": json.dumps({"datasetName": name, "datasetDescription": description}),
"workspace": workspace,
"datasetToolRepo": self.opts.DatasetToolRepo}
{
"input": json.dumps({"datasetName": name, "datasetDescription": description}),
"workspaceID": workspace_id,
"datasetToolRepo": self.opts.DatasetToolRepo,
"env": self.opts.Env,
}
)
return Dataset.model_validate_json(res)

async def add_dataset_element(self, workspace: str, datasetID: str, elementName: str, elementContent: str,
async def add_dataset_element(self, workspace_id: str, datasetID: str, elementName: str, elementContent: str,
elementDescription: str = "") -> DatasetElementMeta:
if workspace == "":
workspace = os.environ["GPTSCRIPT_WORKSPACE_DIR"]
if workspace_id == "":
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]

if datasetID == "":
raise ValueError("datasetID cannot be empty")
Expand All @@ -251,33 +255,41 @@ async def add_dataset_element(self, workspace: str, datasetID: str, elementName:

res = await self._run_basic_command(
"datasets/add-element",
{"input": json.dumps({"datasetID": datasetID,
"elementName": elementName,
"elementContent": elementContent,
"elementDescription": elementDescription}),
"workspace": workspace,
"datasetToolRepo": self.opts.DatasetToolRepo}
{
"input": json.dumps({
"datasetID": datasetID,
"elementName": elementName,
"elementContent": elementContent,
"elementDescription": elementDescription,
}),
"workspaceID": workspace_id,
"datasetToolRepo": self.opts.DatasetToolRepo,
"env": self.opts.Env
}
)
return DatasetElementMeta.model_validate_json(res)

async def list_dataset_elements(self, workspace: str, datasetID: str) -> List[DatasetElementMeta]:
if workspace == "":
workspace = os.environ["GPTSCRIPT_WORKSPACE_DIR"]
async def list_dataset_elements(self, workspace_id: str, datasetID: str) -> List[DatasetElementMeta]:
if workspace_id == "":
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]

if datasetID == "":
raise ValueError("datasetID cannot be empty")

res = await self._run_basic_command(
"datasets/list-elements",
{"input": json.dumps({"datasetID": datasetID}),
"workspace": workspace,
"datasetToolRepo": self.opts.DatasetToolRepo}
{
"input": json.dumps({"datasetID": datasetID}),
"workspaceID": workspace_id,
"datasetToolRepo": self.opts.DatasetToolRepo,
"env": self.opts.Env
}
)
return [DatasetElementMeta.model_validate(d) for d in json.loads(res)]

async def get_dataset_element(self, workspace: str, datasetID: str, elementName: str) -> DatasetElement:
if workspace == "":
workspace = os.environ["GPTSCRIPT_WORKSPACE_DIR"]
async def get_dataset_element(self, workspace_id: str, datasetID: str, elementName: str) -> DatasetElement:
if workspace_id == "":
workspace_id = os.environ["GPTSCRIPT_WORKSPACE_ID"]

if datasetID == "":
raise ValueError("datasetID cannot be empty")
Expand All @@ -286,9 +298,12 @@ async def get_dataset_element(self, workspace: str, datasetID: str, elementName:

res = await self._run_basic_command(
"datasets/get-element",
{"input": json.dumps({"datasetID": datasetID, "element": elementName}),
"workspace": workspace,
"datasetToolRepo": self.opts.DatasetToolRepo}
{
"input": json.dumps({"datasetID": datasetID, "element": elementName}),
"workspaceID": workspace_id,
"datasetToolRepo": self.opts.DatasetToolRepo,
"env": self.opts.Env,
}
)
return DatasetElement.model_validate_json(res)

Expand Down
71 changes: 36 additions & 35 deletions tests/test_gptscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import platform
import subprocess
import tempfile
from datetime import datetime, timedelta, timezone
from time import sleep

Expand Down Expand Up @@ -760,40 +759,42 @@ async def test_credentials(gptscript):

@pytest.mark.asyncio
async def test_datasets(gptscript):
with tempfile.TemporaryDirectory(prefix="py-gptscript_") as tempdir:
dataset_name = str(os.urandom(8).hex())

# Create dataset
dataset = await gptscript.create_dataset(tempdir, dataset_name, "this is a test dataset")
assert dataset.id != "", "Expected dataset id to be set"
assert dataset.name == dataset_name, "Expected dataset name to match"
assert dataset.description == "this is a test dataset", "Expected dataset description to match"
assert len(dataset.elements) == 0, "Expected dataset elements to be empty"

# Add an element
element_meta = await gptscript.add_dataset_element(tempdir, dataset.id, "element1", "element1 contents",
"element1 description")
assert element_meta.name == "element1", "Expected element name to match"
assert element_meta.description == "element1 description", "Expected element description to match"

# Get the element
element = await gptscript.get_dataset_element(tempdir, dataset.id, "element1")
assert element.name == "element1", "Expected element name to match"
assert element.contents == "element1 contents", "Expected element contents to match"
assert element.description == "element1 description", "Expected element description to match"

# List elements in the dataset
elements = await gptscript.list_dataset_elements(tempdir, dataset.id)
assert len(elements) == 1, "Expected one element in the dataset"
assert elements[0].name == "element1", "Expected element name to match"
assert elements[0].description == "element1 description", "Expected element description to match"

# List datasets
datasets = await gptscript.list_datasets(tempdir)
assert len(datasets) > 0, "Expected at least one dataset"
assert datasets[0].id == dataset.id, "Expected dataset id to match"
assert datasets[0].name == dataset_name, "Expected dataset name to match"
assert datasets[0].description == "this is a test dataset", "Expected dataset description to match"
workspace_id = await gptscript.create_workspace("directory")
dataset_name = str(os.urandom(8).hex())

# Create dataset
dataset = await gptscript.create_dataset(workspace_id, dataset_name, "this is a test dataset")
assert dataset.id != "", "Expected dataset id to be set"
assert dataset.name == dataset_name, "Expected dataset name to match"
assert dataset.description == "this is a test dataset", "Expected dataset description to match"
assert len(dataset.elements) == 0, "Expected dataset elements to be empty"

# Add an element
element_meta = await gptscript.add_dataset_element(workspace_id, dataset.id, "element1", "element1 contents",
"element1 description")
assert element_meta.name == "element1", "Expected element name to match"
assert element_meta.description == "element1 description", "Expected element description to match"

# Get the element
element = await gptscript.get_dataset_element(workspace_id, dataset.id, "element1")
assert element.name == "element1", "Expected element name to match"
assert element.contents == "element1 contents", "Expected element contents to match"
assert element.description == "element1 description", "Expected element description to match"

# List elements in the dataset
elements = await gptscript.list_dataset_elements(workspace_id, dataset.id)
assert len(elements) == 1, "Expected one element in the dataset"
assert elements[0].name == "element1", "Expected element name to match"
assert elements[0].description == "element1 description", "Expected element description to match"

# List datasets
datasets = await gptscript.list_datasets(workspace_id)
assert len(datasets) > 0, "Expected at least one dataset"
assert datasets[0].id == dataset.id, "Expected dataset id to match"
assert datasets[0].name == dataset_name, "Expected dataset name to match"
assert datasets[0].description == "this is a test dataset", "Expected dataset description to match"

await gptscript.delete_workspace(workspace_id)


@pytest.mark.asyncio
Expand Down

0 comments on commit e64740b

Please sign in to comment.