-
Notifications
You must be signed in to change notification settings - Fork 390
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add dataset support to be created using distribution settings
- Loading branch information
Showing
10 changed files
with
224 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
...argilla_server/alembic/versions/45a12f74448b_add_distribution_column_to_datasets_table.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright 2021-present, the Recognai S.L. team. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""add distribution column to datasets table | ||
Revision ID: 45a12f74448b | ||
Revises: ca7293c38970 | ||
Create Date: 2024-06-13 11:23:43.395093 | ||
""" | ||
import json | ||
|
||
import sqlalchemy as sa | ||
from alembic import op | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "45a12f74448b" | ||
down_revision = "ca7293c38970" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
DISTRIBUTION_VALUE = json.dumps({"strategy": "overlap", "min_submitted": 1}) | ||
|
||
|
||
def upgrade() -> None: | ||
op.add_column("datasets", sa.Column("distribution", sa.JSON(), nullable=True)) | ||
op.execute(f"UPDATE datasets SET distribution = '{DISTRIBUTION_VALUE}'") | ||
with op.batch_alter_table("datasets") as batch_op: | ||
batch_op.alter_column("distribution", nullable=False) | ||
|
||
|
||
def downgrade() -> None: | ||
op.drop_column("datasets", "distribution") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
119 changes: 119 additions & 0 deletions
119
argilla-server/tests/unit/api/handlers/v1/datasets/test_create_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# Copyright 2021-present, the Recognai S.L. team. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import pytest | ||
from argilla_server.enums import DatasetDistributionStrategy, DatasetStatus | ||
from argilla_server.models import Dataset | ||
from httpx import AsyncClient | ||
from sqlalchemy import func, select | ||
from sqlalchemy.ext.asyncio import AsyncSession | ||
|
||
from tests.factories import WorkspaceFactory | ||
|
||
|
||
@pytest.mark.asyncio | ||
class TestCreateDataset: | ||
def url(self) -> str: | ||
return "/api/v1/datasets" | ||
|
||
async def test_create_dataset_with_default_distribution( | ||
self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict | ||
): | ||
workspace = await WorkspaceFactory.create() | ||
|
||
response = await async_client.post( | ||
self.url(), | ||
headers=owner_auth_header, | ||
json={ | ||
"name": "Dataset Name", | ||
"workspace_id": str(workspace.id), | ||
}, | ||
) | ||
|
||
dataset = (await db.execute(select(Dataset))).scalar_one() | ||
|
||
assert response.status_code == 201 | ||
assert response.json() == { | ||
"id": str(dataset.id), | ||
"name": "Dataset Name", | ||
"guidelines": None, | ||
"allow_extra_metadata": True, | ||
"status": DatasetStatus.draft, | ||
"distribution": { | ||
"strategy": DatasetDistributionStrategy.overlap, | ||
"min_submitted": 1, | ||
}, | ||
"workspace_id": str(workspace.id), | ||
"last_activity_at": dataset.last_activity_at.isoformat(), | ||
"inserted_at": dataset.inserted_at.isoformat(), | ||
"updated_at": dataset.updated_at.isoformat(), | ||
} | ||
|
||
async def test_create_dataset_with_overlap_distribution( | ||
self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict | ||
): | ||
workspace = await WorkspaceFactory.create() | ||
|
||
response = await async_client.post( | ||
self.url(), | ||
headers=owner_auth_header, | ||
json={ | ||
"name": "Dataset Name", | ||
"distribution": { | ||
"strategy": DatasetDistributionStrategy.overlap, | ||
"min_submitted": 4, | ||
}, | ||
"workspace_id": str(workspace.id), | ||
}, | ||
) | ||
|
||
dataset = (await db.execute(select(Dataset))).scalar_one() | ||
|
||
assert response.status_code == 201 | ||
assert response.json() == { | ||
"id": str(dataset.id), | ||
"name": "Dataset Name", | ||
"guidelines": None, | ||
"allow_extra_metadata": True, | ||
"status": DatasetStatus.draft, | ||
"distribution": { | ||
"strategy": DatasetDistributionStrategy.overlap, | ||
"min_submitted": 4, | ||
}, | ||
"workspace_id": str(workspace.id), | ||
"last_activity_at": dataset.last_activity_at.isoformat(), | ||
"inserted_at": dataset.inserted_at.isoformat(), | ||
"updated_at": dataset.updated_at.isoformat(), | ||
} | ||
|
||
async def test_create_dataset_with_overlap_distribution_using_invalid_min_submitted_value( | ||
self, db: AsyncSession, async_client: AsyncClient, owner_auth_header: dict | ||
): | ||
workspace = await WorkspaceFactory.create() | ||
|
||
response = await async_client.post( | ||
self.url(), | ||
headers=owner_auth_header, | ||
json={ | ||
"name": "Dataset name", | ||
"distribution": { | ||
"strategy": DatasetDistributionStrategy.overlap, | ||
"min_submitted": 0, | ||
}, | ||
"workspace_id": str(workspace.id), | ||
}, | ||
) | ||
|
||
assert response.status_code == 422 | ||
assert (await db.execute(select(func.count(Dataset.id)))).scalar_one() == 0 |
Oops, something went wrong.