diff --git a/argilla-sdk/docs/how_to_guides/record.md b/argilla-sdk/docs/how_to_guides/record.md index 028ac2f4d8..9320045ae9 100644 --- a/argilla-sdk/docs/how_to_guides/record.md +++ b/argilla-sdk/docs/how_to_guides/record.md @@ -396,8 +396,6 @@ If your dataset includes some annotations, you can add those to the records as y dataset.records.log(data, user_id=user.id) ``` - - ## List records To list records in a dataset, you can use the `records` method on the `Dataset` object. This method returns a list of `Record` objects that can be iterated over to access the record properties. @@ -422,7 +420,7 @@ for record in dataset.records( ## Update records -You can update records in a dataset calling the `update` method on the `Dataset` object. To update a record, you need to provide the record `id` and the new data to be updated. +You can update records in a dataset calling the `log` method on the `Dataset` object. To update a record, you need to provide the record `id` and the new data to be updated. ```python data = dataset.records.to_list(flatten=True) @@ -436,8 +434,8 @@ updated_data = [ for sample in data ] dataset.records.log(records=updated_data) - ``` + !!! note "Update the metadata" The `metadata` of `Record` object is a python dictionary. So to update the metadata of a record, you can iterate over the records and update the metadata by key or using `metadata.update`. After that, you should update the records in the dataset. @@ -452,4 +450,27 @@ dataset.records.log(records=updated_data) updated_records.append(record) dataset.records.log(records=updated_records) - ``` \ No newline at end of file + ``` + +## Delete records + +You can delete records in a dataset calling the `delete` method on the `Dataset` object. To delete records, you need to retrieve them from the server and get a list with those that you want to delete. + +```python +records_to_delete = list(dataset.records)[:5] +dataset.records.delete(records=records_to_delete) +``` + +!!! tip "Delete records based on a query" + It can be very useful to avoid eliminating records with responses. + + > For more information about the query syntax, check this [how-to guide](query_export.md). + + ```python + status_filter = rg.Query( + filter = rg.Filter(("status", "==", "pending")) + ) + records_to_delete = list(dataset.records(status_filter)) + + dataset.records.delete(records_to_delete) + ``` diff --git a/argilla-sdk/src/argilla_sdk/_api/_records.py b/argilla-sdk/src/argilla_sdk/_api/_records.py index 5aa4198d4a..e7f9ac1f79 100644 --- a/argilla-sdk/src/argilla_sdk/_api/_records.py +++ b/argilla-sdk/src/argilla_sdk/_api/_records.py @@ -154,6 +154,14 @@ def update_many(self, dataset_id: UUID, records: List[RecordModel]) -> None: response.raise_for_status() self._log_message(message=f"Updated {len(records)} records in dataset {dataset_id}") + @api_error_handler + def delete_many(self, dataset_id: UUID, records: List[RecordModel]) -> None: + record_ids = [str(record.id) for record in records] + record_ids_str = ",".join(record_ids) + response = self.http_client.delete(url=f"/api/v1/datasets/{dataset_id}/records", params={"ids": record_ids_str}) + response.raise_for_status() + self._log_message(message=f"Deleted {len(records)} records in dataset {dataset_id}") + @api_error_handler def bulk_create( self, dataset_id: UUID, records: List[RecordModel] diff --git a/argilla-sdk/src/argilla_sdk/records/_dataset_records.py b/argilla-sdk/src/argilla_sdk/records/_dataset_records.py index e23fbd9cb3..11ecd88f10 100644 --- a/argilla-sdk/src/argilla_sdk/records/_dataset_records.py +++ b/argilla-sdk/src/argilla_sdk/records/_dataset_records.py @@ -239,6 +239,34 @@ def log( return created_or_updated + def delete( + self, + records: List[Record], + ) -> List[Record]: + """Delete records in a dataset on the server using the provided records + and matching based on the id. + + Parameters: + records: A list of `Record` objects representing the records to be deleted. + + Returns: + A list of Record objects representing the deleted records. + + """ + mapping = None + user_id = self.__client.me.id + + record_models = self._ingest_records(records=records, mapping=mapping, user_id=user_id) + + self._api.delete_many(dataset_id=self.__dataset.id, records=record_models) + + self._log_message( + message=f"Deleted {len(record_models)} records from dataset {self.__dataset.name}", + level="info", + ) + + return record_models + def to_dict(self, flatten: bool = False, orient: str = "names") -> Dict[str, Any]: """ Return the records as a dictionary. This is a convenient shortcut for dataset.records(...).to_dict(). diff --git a/argilla-sdk/tests/integration/test_delete_records.py b/argilla-sdk/tests/integration/test_delete_records.py new file mode 100644 index 0000000000..7b4dd3a661 --- /dev/null +++ b/argilla-sdk/tests/integration/test_delete_records.py @@ -0,0 +1,102 @@ +# Copyright 2024-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import uuid +import pytest + +import argilla_sdk as rg + + +@pytest.fixture +def dataset(client: rg.Argilla) -> rg.Dataset: + workspace = client.workspaces[0] + mock_dataset_name = f"test_delete_records_{uuid.uuid1()}" + settings = rg.Settings( + allow_extra_metadata=True, + fields=[ + rg.TextField(name="text"), + ], + questions=[ + rg.TextQuestion(name="label", use_markdown=False), + ], + ) + dataset = rg.Dataset( + name=mock_dataset_name, + workspace=workspace.name, + settings=settings, + client=client, + ) + dataset.create() + return dataset + + +def test_delete_records(client: rg.Argilla, dataset: rg.Dataset): + mock_data = [ + { + "text": "Hello World, how are you?", + "label": "negative", + "id": uuid.uuid4(), + }, + { + "text": "Hello World, how are you?", + "label": "negative", + "id": uuid.uuid4(), + }, + { + "text": "Hello World, how are you?", + "label": "negative", + "id": uuid.uuid4(), + }, + ] + + dataset.records.log(records=mock_data) + records_to_delete = list(dataset.records)[:2] + dataset.records.delete(records_to_delete) + dataset_records = list(dataset.records) + + assert len(dataset_records) == 1 + assert dataset_records[0].id == str(mock_data[2]["id"]) + + for record in dataset_records: + assert record.id not in [record.id for record in records_to_delete] + + +def test_delete_single_record(client: rg.Argilla, dataset: rg.Dataset): + mock_data = [ + { + "text": "Hello World, how are you?", + "label": "negative", + "id": uuid.uuid4(), + }, + { + "text": "Hello World, how are you?", + "label": "negative", + "id": uuid.uuid4(), + }, + { + "text": "Hello World, how are you?", + "label": "negative", + "id": uuid.uuid4(), + }, + ] + + dataset.records.log(records=mock_data) + records_to_delete = [list(dataset.records)[1]] + dataset.records.delete(records_to_delete) + dataset_records = list(dataset.records) + + assert len(dataset_records) == 2 + assert dataset_records[0].id == str(mock_data[0]["id"]) + assert dataset_records[1].id == str(mock_data[2]["id"]) + assert mock_data[1]["id"] not in [record.id for record in dataset_records]