Skip to content

Commit

Permalink
Merge branch 'feat/updating-record' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
frascuchon committed Jun 19, 2024
2 parents 3c27034 + 1ab171c commit 0c3a247
Show file tree
Hide file tree
Showing 16 changed files with 180 additions and 179 deletions.
18 changes: 17 additions & 1 deletion argilla/docs/how_to_guides/record.md
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ updated_data = [
dataset.records.log(records=updated_data)
```

!!! note "Update the metadata"
=== "Update the metadata"
The `metadata` of `Record` object is a python dictionary. So to update the metadata of a record, you can iterate over the records and update the metadata by key or using `metadata.update`. After that, you should update the records in the dataset.

```python
Expand All @@ -452,6 +452,22 @@ dataset.records.log(records=updated_data)
dataset.records.log(records=updated_records)
```

=== "Update vectors"
When a new vector field is added to the dataset settings, or some value for the existing record vectors must updated, you can iterate over the records and update the vectors in the same way as the metadata.

```python
updated_records = []

for record in dataset.records():

record.vectors["new_vector"] = [...]
record.vector["v"] = [...]

updated_records.append(record)

dataset.records.log(records=updated_records)
```

## Delete records

You can delete records in a dataset calling the `delete` method on the `Dataset` object. To delete records, you need to retrieve them from the server and get a list with those that you want to delete.
Expand Down
4 changes: 2 additions & 2 deletions argilla/src/argilla/_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
from argilla._models._workspace import WorkspaceModel
from argilla._models._user import UserModel, Role
from argilla._models._dataset import DatasetModel
from argilla._models._record._record import RecordModel
from argilla._models._record._record import RecordModel, FieldValue
from argilla._models._record._suggestion import SuggestionModel
from argilla._models._record._response import UserResponseModel, ResponseStatus
from argilla._models._record._vector import VectorModel
from argilla._models._record._vector import VectorModel, VectorValue
from argilla._models._record._metadata import MetadataModel, MetadataValue
from argilla._models._search import (
SearchQueryModel,
Expand Down
9 changes: 6 additions & 3 deletions argilla/src/argilla/_models/_record/_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@

from pydantic import Field, field_serializer, field_validator

from argilla._models._resource import ResourceModel
from argilla._models._record._metadata import MetadataModel, MetadataValue
from argilla._models._record._response import UserResponseModel
from argilla._models._record._suggestion import SuggestionModel
from argilla._models._record._vector import VectorModel
from argilla._models._resource import ResourceModel

__all__ = ["RecordModel", "FieldValue"]

FieldValue = Union[str, None]

class RecordModel(ResourceModel):
"""Schema for the records of a `Dataset`"""

fields: Optional[Dict[str, Union[str, None]]] = None
fields: Optional[Dict[str, FieldValue]] = None
metadata: Optional[Union[List[MetadataModel], Dict[str, MetadataValue]]] = Field(default_factory=dict)
vectors: Optional[List[VectorModel]] = Field(default_factory=list)
responses: Optional[List[UserResponseModel]] = Field(default_factory=list)
Expand All @@ -49,7 +52,7 @@ def serialize_metadata(self, value: List[MetadataModel]) -> Dict[str, Any]:
return {metadata.name: metadata.value for metadata in value}

@field_serializer("fields", when_used="always")
def serialize_empty_fields(self, value: Dict[str, Union[str, None]]) -> Dict[str, Union[str, None]]:
def serialize_empty_fields(self, value: Dict[str, Union[str, None]]) -> Optional[Dict[str, Union[str, None]]]:
"""Serialize empty fields to None."""
if isinstance(value, dict) and len(value) == 0:
return None
Expand Down
10 changes: 6 additions & 4 deletions argilla/src/argilla/_models/_record/_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import List

from pydantic import field_validator

from argilla._models import ResourceModel

import re
from pydantic import field_validator
__all__ = ["VectorModel", "VectorValue"]

__all__ = ["VectorModel"]
VectorValue = List[float]


class VectorModel(ResourceModel):
name: str
vector_values: List[float]
vector_values: VectorValue

@field_validator("name")
@classmethod
Expand Down
18 changes: 10 additions & 8 deletions argilla/src/argilla/records/_dataset_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from argilla._api import RecordsAPI
from argilla._helpers import LoggingMixin
from argilla._models import RecordModel, MetadataValue
from argilla._models import RecordModel, MetadataValue, VectorValue, FieldValue
from argilla.client import Argilla
from argilla.records._io import GenericIO, HFDataset, HFDatasetsIO, JsonIO
from argilla.records._resource import Record
Expand Down Expand Up @@ -414,13 +414,15 @@ def _infer_record_from_mapping(
Returns:
A Record object.
"""
fields: Dict[str, str] = {}
responses: List[Response] = []
record_id: Optional[str] = None
suggestion_values = defaultdict(dict)
vectors: List[Vector] = []

fields: Dict[str, FieldValue] = {}
vectors: Dict[str, VectorValue] = {}
metadata: Dict[str, MetadataValue] = {}

responses: List[Response] = []
suggestion_values: Dict[str, dict] = defaultdict(dict)

schema = self.__dataset.schema

for attribute, value in data.items():
Expand Down Expand Up @@ -475,7 +477,7 @@ def _infer_record_from_mapping(
{"value": value, "question_name": attribute, "question_id": schema_item.id}
)
elif isinstance(schema_item, VectorField):
vectors.append(Vector(name=attribute, values=value))
vectors[attribute] = value
elif isinstance(schema_item, MetadataPropertyBase):
metadata[attribute] = value
else:
Expand All @@ -487,9 +489,9 @@ def _infer_record_from_mapping(
return Record(
id=record_id,
fields=fields,
suggestions=suggestions,
responses=responses,
vectors=vectors,
metadata=metadata,
suggestions=suggestions,
responses=responses,
_dataset=self.__dataset,
)
142 changes: 60 additions & 82 deletions argilla/src/argilla/records/_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
SuggestionModel,
VectorModel,
MetadataValue,
FieldValue,
VectorValue,
)
from argilla._resource import Resource
from argilla.responses import Response, UserResponse
Expand Down Expand Up @@ -54,9 +56,9 @@ class Record(Resource):
def __init__(
self,
id: Optional[Union[UUID, str]] = None,
fields: Optional[Dict[str, Union[str, None]]] = None,
fields: Optional[Dict[str, FieldValue]] = None,
metadata: Optional[Dict[str, MetadataValue]] = None,
vectors: Optional[List[Vector]] = None,
vectors: Optional[Dict[str, VectorValue]] = None,
responses: Optional[List[Response]] = None,
suggestions: Optional[List[Suggestion]] = None,
_server_id: Optional[UUID] = None,
Expand Down Expand Up @@ -93,7 +95,7 @@ def __init__(
# Initialize the fields
self.__fields = RecordFields(fields=self._model.fields)
# Initialize the vectors
self.__vectors = RecordVectors(vectors=vectors, record=self)
self.__vectors = RecordVectors(vectors=vectors)
# Initialize the metadata
self.__metadata = RecordMetadata(metadata=metadata)
self.__responses = RecordResponses(responses=responses, record=self)
Expand Down Expand Up @@ -158,8 +160,8 @@ def api_model(self) -> RecordModel:
id=self._model.id,
external_id=self._model.external_id,
fields=self.fields.to_dict(),
metadata=self.metadata.models,
vectors=self.vectors.models,
metadata=self.metadata.api_models(),
vectors=self.vectors.api_models(),
responses=self.responses.api_models(),
suggestions=self.suggestions.api_models(),
)
Expand All @@ -181,19 +183,22 @@ def to_dict(self) -> Dict[str, Dict]:
represented as a key-value pair in the dictionary of the respective key. i.e.
`{"fields": {"prompt": "...", "response": "..."}, "responses": {"rating": "..."},
"""
id = str(self.id) if self.id else None
server_id = str(self._model.id) if self._model.id else None
fields = self.fields.to_dict()
metadata = dict(self.metadata)
metadata = self.metadata.to_dict()
suggestions = self.suggestions.to_dict()
responses = self.responses.to_dict()
vectors = self.vectors.to_dict()

return {
"id": self.id,
"id": id,
"fields": fields,
"metadata": metadata,
"suggestions": suggestions,
"responses": responses,
"vectors": vectors,
"_server_id": str(self._model.id) if self._model.id else None,
"_server_id": server_id,
}

@classmethod
Expand All @@ -219,7 +224,6 @@ def from_dict(cls, data: Dict[str, Dict], dataset: Optional["Dataset"] = None) -
for question_name, _responses in responses.items()
for value in _responses
]
vectors = [Vector(name=vector_name, values=values) for vector_name, values in vectors.items()]

return cls(
id=record_id,
Expand All @@ -245,7 +249,7 @@ def from_model(cls, model: RecordModel, dataset: "Dataset") -> "Record":
id=model.external_id,
fields=model.fields,
metadata={meta.name: meta.value for meta in model.metadata},
vectors=[Vector.from_model(model=vector) for vector in model.vectors],
vectors={vector.name: vector.vector_values for vector in model.vectors},
# Responses and their models are not aligned 1-1.
responses=[
response
Expand All @@ -258,27 +262,44 @@ def from_model(cls, model: RecordModel, dataset: "Dataset") -> "Record":
)


class RecordFields:
class RecordFields(dict):
"""This is a container class for the fields of a Record.
It allows for accessing fields by attribute and iterating over them.
It allows for accessing fields by attribute and key name.
"""

def __init__(self, fields: Dict[str, Union[str, None]]) -> None:
self.__fields = fields or {}
for key, value in self.__fields.items():
setattr(self, key, value)
def __init__(self, fields: Optional[Dict[str, FieldValue]] = None) -> None:
super().__init__(fields or {})

def __getitem__(self, key: str) -> Optional[str]:
return self.__fields.get(key)
def to_dict(self) -> dict:
return dict(self.items())

def __iter__(self):
return iter(self.__fields)

def to_dict(self) -> Dict[str, Union[str, None]]:
return self.__fields
class RecordMetadata(dict):
"""This is a container class for the metadata of a Record."""

def __repr__(self) -> str:
return self.to_dict().__repr__()
def __init__(self, metadata: Optional[Dict[str, MetadataValue]] = None) -> None:
super().__init__(metadata or {})

def to_dict(self) -> dict:
return dict(self.items())

def api_models(self) -> List[MetadataModel]:
return [MetadataModel(name=key, value=value) for key, value in self.items()]


class RecordVectors(dict):
"""This is a container class for the vectors of a Record.
It allows for accessing suggestions by attribute and key name.
"""

def __init__(self, vectors: Dict[str, VectorValue]) -> None:
super().__init__(vectors or {})

def to_dict(self) -> Dict[str, List[float]]:
return dict(self.items())

def api_models(self) -> List[VectorModel]:
return [Vector(name=name, values=value).api_model() for name, value in self.items()]


class RecordResponses(Iterable[Response]):
Expand Down Expand Up @@ -309,6 +330,16 @@ def __getattr__(self, name) -> List[Response]:
def __repr__(self) -> str:
return {k: [{"value": v["value"]} for v in values] for k, values in self.to_dict().items()}.__repr__()

def to_dict(self) -> Dict[str, List[Dict]]:
"""Converts the responses to a dictionary.
Returns:
A dictionary of responses.
"""
response_dict = defaultdict(list)
for response in self.__responses:
response_dict[response.question_name].append({"value": response.value, "user_id": str(response.user_id)})
return response_dict

def api_models(self) -> List[UserResponseModel]:
"""Returns a list of ResponseModel objects."""

Expand All @@ -321,16 +352,6 @@ def api_models(self) -> List[UserResponseModel]:
for responses in responses_by_user_id.values()
]

def to_dict(self) -> Dict[str, List[Dict]]:
"""Converts the responses to a dictionary.
Returns:
A dictionary of responses.
"""
response_dict = defaultdict(list)
for response in self.__responses:
response_dict[response.question_name].append({"value": response.value, "user_id": response.user_id})
return response_dict


class RecordSuggestions(Iterable[Suggestion]):
"""This is a container class for the suggestions of a Record.
Expand All @@ -345,15 +366,15 @@ def __init__(self, suggestions: List[Suggestion], record: Record) -> None:
suggestion.record = self.record
setattr(self, suggestion.question_name, suggestion)

def api_models(self) -> List[SuggestionModel]:
return [suggestion.api_model() for suggestion in self.__suggestions]

def __iter__(self):
return iter(self.__suggestions)

def __getitem__(self, index: int):
return self.__suggestions[index]

def __repr__(self) -> str:
return self.to_dict().__repr__()

def to_dict(self) -> Dict[str, List[str]]:
"""Converts the suggestions to a dictionary.
Returns:
Expand All @@ -368,48 +389,5 @@ def to_dict(self) -> Dict[str, List[str]]:
}
return suggestion_dict

def __repr__(self) -> str:
return self.to_dict().__repr__()


class RecordVectors:
"""This is a container class for the vectors of a Record.
It allows for accessing suggestions by attribute and iterating over them.
"""

def __init__(self, vectors: List[Vector], record: Record) -> None:
self.__vectors = vectors or []
self.record = record
for vector in self.__vectors:
setattr(self, vector.name, vector.values)

def __repr__(self) -> str:
return {vector.name: f"{len(vector.values)}" for vector in self.__vectors}.__repr__()

@property
def models(self) -> List[VectorModel]:
return [vector.api_model() for vector in self.__vectors]

def to_dict(self) -> Dict[str, List[float]]:
"""Converts the vectors to a dictionary.
Returns:
A dictionary of vectors.
"""
return {vector.name: list(map(float, vector.values)) for vector in self.__vectors}


class RecordMetadata(dict):
"""This is a container class for the metadata of a Record."""

def __init__(self, metadata: Optional[Dict[str, MetadataValue]] = None) -> None:
super().__init__(metadata or {})

def __getattr__(self, item: str):
return self[item]

def __setattr__(self, key: str, value: MetadataValue):
self[key] = value

@property
def models(self) -> List[MetadataModel]:
return [MetadataModel(name=key, value=value) for key, value in self.items()]
def api_models(self) -> List[SuggestionModel]:
return [suggestion.api_model() for suggestion in self.__suggestions]
Loading

0 comments on commit 0c3a247

Please sign in to comment.