From 9da785f3a5225743e0f0d51543a10a4c1774bc6f Mon Sep 17 00:00:00 2001 From: Julian Steiner Date: Tue, 11 Jun 2024 18:50:21 +0200 Subject: [PATCH] Various fixes --- CHANGELOG.md | 3 ++- .../how_to_human_evaluation_via_argilla.ipynb | 12 +++++++----- src/documentation/human_evaluation.ipynb | 6 +++--- .../connectors/argilla/argilla_client.py | 6 +++--- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30ab51d4c..c21c58bb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,13 @@ - Renamed `ArgillaEvaluation` to `ArgillaRatingEvaluation` - Changed logic of `ensure_dataset_exists` in `DefaultArgillaClient` -> Check first if dataset exists. If exists return dataset_id of existing dataset. If not create the dataset. - Changed api url in `_list_workspaces` in `DefaultArgillaClient` to api v1 url. + - Changed argilla image in `docker-compose.yml` from `argilla/argilla-server:v1.26.1` to `argilla/argilla-server:v1.29.0` ### New Features - Add `eot_token` property to `ControlModel` and derived classes (`LuminousControlModel`, `Llama2InstructModel` and `Llama3InstructModel`) and let `PromptBasedClassify` use this property instead of a hardcoded string. - Argilla type `TextQuestion` added for collecting natural language feedback or textual responses from labelers. - Argilla type `RatingQuestion` added for capturing numerical rating feedback - + ### Fixes - Reinitializing different `AlephAlphaModel` instances and retrieving their tokenizer should now consume a lot less memory. diff --git a/src/documentation/how_tos/how_to_human_evaluation_via_argilla.ipynb b/src/documentation/how_tos/how_to_human_evaluation_via_argilla.ipynb index 89c7bbc2b..3ea9e51d0 100644 --- a/src/documentation/how_tos/how_to_human_evaluation_via_argilla.ipynb +++ b/src/documentation/how_tos/how_to_human_evaluation_via_argilla.ipynb @@ -11,10 +11,10 @@ "from pydantic import BaseModel\n", "\n", "from intelligence_layer.connectors import (\n", - " ArgillaEvaluation,\n", + " ArgillaRatingEvaluation,\n", " DefaultArgillaClient,\n", " Field,\n", - " Question,\n", + " RatingQuestion,\n", " RecordData,\n", ")\n", "from intelligence_layer.evaluation import (\n", @@ -40,7 +40,7 @@ "3. Create an `AsyncEvaluationRepository`\n", "4. Define new output type for the evaluation\n", "5. Implement an `ArgillaEvaluationLogic`\n", - " 1. Create `Question`s and `Field`s to structure the data that will be displayed in Argilla\n", + " 1. Create `RatingQuestion`s and `Field`s to structure the data that will be displayed in Argilla\n", " 2. Implement `to_record` to convert the task input into an Argilla record\n", " 3. Implement `from_record` to convert the record back to an evaluation result\n", "6. Submit tasks to the Argilla instance by running the `ArgillaEvaluator`\n", @@ -97,7 +97,7 @@ " def __init__(self):\n", " super().__init__(\n", " questions=[\n", - " Question(\n", + " RatingQuestion(\n", " name=\"rating\",\n", " title=\"Funniness\",\n", " description=\"How funny do you think is the joke? Rate it from 1-5.\",\n", @@ -131,7 +131,9 @@ " )\n", "\n", " # Step 5.3\n", - " def from_record(self, argilla_evaluation: ArgillaEvaluation) -> FunnyOutputRating:\n", + " def from_record(\n", + " self, argilla_evaluation: ArgillaRatingEvaluation\n", + " ) -> FunnyOutputRating:\n", " return FunnyOutputRating(rating=argilla_evaluation.responses[\"rating\"])\n", "\n", "\n", diff --git a/src/documentation/human_evaluation.ipynb b/src/documentation/human_evaluation.ipynb index 0009e0aa1..974197155 100644 --- a/src/documentation/human_evaluation.ipynb +++ b/src/documentation/human_evaluation.ipynb @@ -19,7 +19,7 @@ " DefaultArgillaClient,\n", " Field,\n", " LimitedConcurrencyClient,\n", - " Question,\n", + " RatingQuestion,\n", " RecordData,\n", ")\n", "from intelligence_layer.core import (\n", @@ -278,13 +278,13 @@ "outputs": [], "source": [ "questions = [\n", - " Question(\n", + " RatingQuestion(\n", " name=\"general_rating\", # name of the field in program, used for retrieval later\n", " title=\"Rating\", # name shown to the user\n", " description=\"Rate this completion on a scale from 1 to 5\",\n", " options=range(1, 6),\n", " ),\n", - " Question(\n", + " RatingQuestion(\n", " name=\"fluency\",\n", " title=\"Fluency\",\n", " description=\"How fluent is the completion?\",\n", diff --git a/src/intelligence_layer/connectors/argilla/argilla_client.py b/src/intelligence_layer/connectors/argilla/argilla_client.py index ec3cb15ea..3387636bd 100644 --- a/src/intelligence_layer/connectors/argilla/argilla_client.py +++ b/src/intelligence_layer/connectors/argilla/argilla_client.py @@ -69,7 +69,7 @@ class RatingQuestion(Question): @computed_field # type: ignore[misc] @property - def settings(self) -> Mapping[Any, Any]: + def settings(self) -> Mapping[str, Any]: return { "type": "rating", "options": [{"value": option} for option in self.options], @@ -87,7 +87,7 @@ class TextQuestion(Question): @computed_field # type: ignore[misc] @property - def settings(self) -> Mapping[Any, Any]: + def settings(self) -> Mapping[str, Any]: return {"type": "text", "use_markdown": self.use_markdown} @@ -570,7 +570,7 @@ def _create_question( name: str, title: str, description: str, - settings: Mapping[Any, Any], + settings: Mapping[str, Any], dataset_id: str, ) -> None: url = self.api_url + f"api/v1/datasets/{dataset_id}/questions"