Skip to content

Commit

Permalink
missing fixture
Browse files Browse the repository at this point in the history
  • Loading branch information
adrian-chang committed Mar 20, 2024
1 parent 3ca3781 commit fe77c43
Show file tree
Hide file tree
Showing 3 changed files with 318 additions and 205 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/publish-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,15 @@ jobs:
PYTEST_XDIST_AUTO_NUM_WORKERS: 32
LABELBOX_TEST_API_KEY: ${{ secrets[matrix.prod-key] }}
DA_GCP_LABELBOX_API_KEY: ${{ secrets[matrix.da-test-key] }}
LABELBOX_TEST_ENVIRON: prod
run: |
rye run pytest libs/labelbox/tests/integration
- name: Data Testing
env:
PYTEST_XDIST_AUTO_NUM_WORKERS: 32
LABELBOX_TEST_API_KEY: ${{ secrets[matrix.prod-key] }}
DA_GCP_LABELBOX_API_KEY: ${{ secrets[matrix.da-test-key] }}
LABELBOX_TEST_ENVIRON: prod
run: |
rye add labelbox --path ./$(find ./dist/ -name *.tar.gz) --sync --features labelbox/data
rye run pytest libs/labelbox/tests/data
315 changes: 315 additions & 0 deletions libs/labelbox/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import time
import requests
from types import SimpleNamespace
from typing import Type
from enum import Enum
from typing import Tuple

Expand Down Expand Up @@ -665,3 +666,317 @@ def _setup_ontology(project):
# TODO: ontology may not be synchronous after setup. remove sleep when api is more consistent
time.sleep(2)
return OntologyBuilder.from_project(project)


@pytest.fixture
def big_dataset(dataset: Dataset):
task = dataset.create_data_rows([
{
"row_data": IMAGE_URL,
"external_id": EXTERNAL_ID
},
] * 3)
task.wait_till_done()

yield dataset

@pytest.fixture
def configured_batch_project_with_label(project, dataset, data_row,
wait_for_label_processing):
"""Project with a batch having one datarow
Project contains an ontology with 1 bbox tool
Additionally includes a create_label method for any needed extra labels
One label is already created and yielded when using fixture
"""
data_rows = [dr.uid for dr in list(dataset.data_rows())]
project._wait_until_data_rows_are_processed(data_row_ids=data_rows,
sleep_interval=3)
project.create_batch("test-batch", data_rows)
project.data_row_ids = data_rows

ontology = _setup_ontology(project)
label = _create_label(project, data_row, ontology,
wait_for_label_processing)

yield [project, dataset, data_row, label]

for label in project.labels():
label.delete()


@pytest.fixture
def configured_batch_project_with_multiple_datarows(project, dataset, data_rows,
wait_for_label_processing):
"""Project with a batch having multiple datarows
Project contains an ontology with 1 bbox tool
Additionally includes a create_label method for any needed extra labels
"""
global_keys = [dr.global_key for dr in data_rows]

batch_name = f'batch {uuid.uuid4()}'
project.create_batch(batch_name, global_keys=global_keys)

ontology = _setup_ontology(project)
for datarow in data_rows:
_create_label(project, datarow, ontology, wait_for_label_processing)

yield [project, dataset, data_rows]

for label in project.labels():
label.delete()

# NOTE this is nice heuristics, also there is this logic _wait_until_data_rows_are_processed in Project
# in case we still have flakiness in the future, we can use it
@pytest.fixture
def wait_for_data_row_processing():
"""
Do not use. Only for testing.
Returns DataRow after waiting for it to finish processing media_attributes.
Some tests, specifically ones that rely on label export, rely on
DataRow be fully processed with media_attributes
"""

def func(client, data_row, compare_with_prev_media_attrs=False):
"""
added check_updated_at because when a data_row is updated from say
an image to pdf, it already has media_attributes and the loop does
not wait for processing to a pdf
"""
prev_media_attrs = data_row.media_attributes if compare_with_prev_media_attrs else None
data_row_id = data_row.uid
timeout_seconds = 60
while True:
data_row = client.get_data_row(data_row_id)
if data_row.media_attributes and (prev_media_attrs is None or
prev_media_attrs
!= data_row.media_attributes):
return data_row
timeout_seconds -= 2
if timeout_seconds <= 0:
raise TimeoutError(
f"Timed out waiting for DataRow '{data_row_id}' to finish processing media_attributes"
)
time.sleep(2)

return func


@pytest.fixture
def wait_for_label_processing():
"""
Do not use. Only for testing.
Returns project's labels as a list after waiting for them to finish processing.
If `project.labels()` is called before label is fully processed,
it may return an empty set
"""

def func(project):
timeout_seconds = 10
while True:
labels = list(project.labels())
if len(labels) > 0:
return labels
timeout_seconds -= 2
if timeout_seconds <= 0:
raise TimeoutError(
f"Timed out waiting for label for project '{project.uid}' to finish processing"
)
time.sleep(2)

return func

@pytest.fixture
def initial_dataset(client, rand_gen):
dataset = client.create_dataset(name=rand_gen(str))
yield dataset

dataset.delete()


@pytest.fixture
def video_data(client, rand_gen, video_data_row, wait_for_data_row_processing):
dataset = client.create_dataset(name=rand_gen(str))
data_row_ids = []
data_row = dataset.create_data_row(video_data_row)
data_row = wait_for_data_row_processing(client, data_row)
data_row_ids.append(data_row.uid)
yield dataset, data_row_ids
dataset.delete()


def create_video_data_row(rand_gen):
return {
"row_data":
"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-1.mp4",
"global_key":
f"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-1.mp4-{rand_gen(str)}",
"media_type":
"VIDEO",
}


@pytest.fixture
def video_data_100_rows(client, rand_gen, wait_for_data_row_processing):
dataset = client.create_dataset(name=rand_gen(str))
data_row_ids = []
for _ in range(100):
data_row = dataset.create_data_row(create_video_data_row(rand_gen))
data_row = wait_for_data_row_processing(client, data_row)
data_row_ids.append(data_row.uid)
yield dataset, data_row_ids
dataset.delete()


@pytest.fixture()
def video_data_row(rand_gen):
return create_video_data_row(rand_gen)


class ExportV2Helpers:

@classmethod
def run_project_export_v2_task(cls,
project,
num_retries=5,
task_name=None,
filters={},
params={}):
task = None
params = params if params else {
"project_details": True,
"performance_details": False,
"data_row_details": True,
"label_details": True
}
while (num_retries > 0):
task = project.export_v2(task_name=task_name,
filters=filters,
params=params)
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
if len(task.result) == 0:
num_retries -= 1
time.sleep(5)
else:
break
return task.result

@classmethod
def run_dataset_export_v2_task(cls,
dataset,
num_retries=5,
task_name=None,
filters={},
params={}):
task = None
params = params if params else {
"performance_details": False,
"label_details": True
}
while (num_retries > 0):
task = dataset.export_v2(task_name=task_name,
filters=filters,
params=params)
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
if len(task.result) == 0:
num_retries -= 1
time.sleep(5)
else:
break

return task.result

@classmethod
def run_catalog_export_v2_task(cls,
client,
num_retries=5,
task_name=None,
filters={},
params={}):
task = None
params = params if params else {
"performance_details": False,
"label_details": True
}
catalog = client.get_catalog()
while (num_retries > 0):

task = catalog.export_v2(task_name=task_name,
filters=filters,
params=params)
task.wait_till_done()
assert task.status == "COMPLETE"
assert task.errors is None
if len(task.result) == 0:
num_retries -= 1
time.sleep(5)
else:
break

return task.result


@pytest.fixture
def export_v2_test_helpers() -> Type[ExportV2Helpers]:
return ExportV2Helpers()


@pytest.fixture
def big_dataset_data_row_ids(big_dataset: Dataset):
yield [dr.uid for dr in list(big_dataset.export_data_rows())]


@pytest.fixture(scope='function')
def dataset_with_invalid_data_rows(unique_dataset: Dataset,
upload_invalid_data_rows_for_dataset):
upload_invalid_data_rows_for_dataset(unique_dataset)

yield unique_dataset


@pytest.fixture
def upload_invalid_data_rows_for_dataset():

def _upload_invalid_data_rows_for_dataset(dataset: Dataset):
task = dataset.create_data_rows([
{
"row_data": 'gs://invalid-bucket/example.png', # forbidden
"external_id": "image-without-access.jpg"
},
] * 2)
task.wait_till_done()

return _upload_invalid_data_rows_for_dataset


@pytest.fixture
def configured_project(project_with_empty_ontology, initial_dataset, rand_gen,
image_url):
dataset = initial_dataset
data_row_id = dataset.create_data_row(row_data=image_url).uid
project = project_with_empty_ontology

batch = project.create_batch(
rand_gen(str),
[data_row_id], # sample of data row objects
5 # priority between 1(Highest) - 5(lowest)
)
project.data_row_ids = [data_row_id]

yield project

batch.delete()

@pytest.fixture
def project_with_empty_ontology(project):
editor = list(
project.client.get_labeling_frontends(
where=LabelingFrontend.name == "editor"))[0]
empty_ontology = {"tools": [], "classifications": []}
project.setup(editor, empty_ontology)
yield project
Loading

0 comments on commit fe77c43

Please sign in to comment.