Various fixes

Aleph-Alpha · Jun 12, 2024 · 9da785f · 9da785f
1 parent 12c5077
commit 9da785f
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 12 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,12 +7,13 @@
   - Renamed `ArgillaEvaluation` to `ArgillaRatingEvaluation`
   - Changed logic of `ensure_dataset_exists` in `DefaultArgillaClient` -> Check first if dataset exists. If exists return dataset_id of existing dataset. If not create the dataset.
   - Changed api url in `_list_workspaces` in `DefaultArgillaClient` to api v1 url.
+  - Changed argilla image in `docker-compose.yml` from `argilla/argilla-server:v1.26.1` to `argilla/argilla-server:v1.29.0`
 
 ### New Features
   - Add `eot_token` property to `ControlModel` and derived classes (`LuminousControlModel`, `Llama2InstructModel` and `Llama3InstructModel`) and let `PromptBasedClassify` use this property instead of a hardcoded string.
   - Argilla type `TextQuestion` added for collecting natural language feedback or textual responses from labelers.
   - Argilla type `RatingQuestion` added for capturing numerical rating feedback
-  
+
 ### Fixes
   - Reinitializing different `AlephAlphaModel` instances and retrieving their tokenizer should now consume a lot less memory.
 

diff --git a/src/documentation/how_tos/how_to_human_evaluation_via_argilla.ipynb b/src/documentation/how_tos/how_to_human_evaluation_via_argilla.ipynb
@@ -11,10 +11,10 @@
     "from pydantic import BaseModel\n",
     "\n",
     "from intelligence_layer.connectors import (\n",
-    "    ArgillaEvaluation,\n",
+    "    ArgillaRatingEvaluation,\n",
     "    DefaultArgillaClient,\n",
     "    Field,\n",
-    "    Question,\n",
+    "    RatingQuestion,\n",
     "    RecordData,\n",
     ")\n",
     "from intelligence_layer.evaluation import (\n",
@@ -40,7 +40,7 @@
     "3. Create an `AsyncEvaluationRepository`\n",
     "4. Define new output type for the evaluation\n",
     "5. Implement an `ArgillaEvaluationLogic`\n",
-    "   1. Create `Question`s and `Field`s to structure the data that will be displayed in Argilla\n",
+    "   1. Create `RatingQuestion`s and `Field`s to structure the data that will be displayed in Argilla\n",
     "   2. Implement `to_record` to convert the task input into an Argilla record\n",
     "   3. Implement `from_record` to convert the record back to an evaluation result\n",
     "6. Submit tasks to the Argilla instance by running the `ArgillaEvaluator`\n",
@@ -97,7 +97,7 @@
     "    def __init__(self):\n",
     "        super().__init__(\n",
     "            questions=[\n",
-    "                Question(\n",
+    "                RatingQuestion(\n",
     "                    name=\"rating\",\n",
     "                    title=\"Funniness\",\n",
     "                    description=\"How funny do you think is the joke? Rate it from 1-5.\",\n",
@@ -131,7 +131,9 @@
     "        )\n",
     "\n",
     "    # Step 5.3\n",
-    "    def from_record(self, argilla_evaluation: ArgillaEvaluation) -> FunnyOutputRating:\n",
+    "    def from_record(\n",
+    "        self, argilla_evaluation: ArgillaRatingEvaluation\n",
+    "    ) -> FunnyOutputRating:\n",
     "        return FunnyOutputRating(rating=argilla_evaluation.responses[\"rating\"])\n",
     "\n",
     "\n",

diff --git a/src/documentation/human_evaluation.ipynb b/src/documentation/human_evaluation.ipynb
@@ -19,7 +19,7 @@
     "    DefaultArgillaClient,\n",
     "    Field,\n",
     "    LimitedConcurrencyClient,\n",
-    "    Question,\n",
+    "    RatingQuestion,\n",
     "    RecordData,\n",
     ")\n",
     "from intelligence_layer.core import (\n",
@@ -278,13 +278,13 @@
    "outputs": [],
    "source": [
     "questions = [\n",
-    "    Question(\n",
+    "    RatingQuestion(\n",
     "        name=\"general_rating\",  # name of the field in program, used for retrieval later\n",
     "        title=\"Rating\",  # name shown to the user\n",
     "        description=\"Rate this completion on a scale from 1 to 5\",\n",
     "        options=range(1, 6),\n",
     "    ),\n",
-    "    Question(\n",
+    "    RatingQuestion(\n",
     "        name=\"fluency\",\n",
     "        title=\"Fluency\",\n",
     "        description=\"How fluent is the completion?\",\n",

diff --git a/src/intelligence_layer/connectors/argilla/argilla_client.py b/src/intelligence_layer/connectors/argilla/argilla_client.py
@@ -69,7 +69,7 @@ class RatingQuestion(Question):
 
     @computed_field  # type: ignore[misc]
     @property
-    def settings(self) -> Mapping[Any, Any]:
+    def settings(self) -> Mapping[str, Any]:
         return {
             "type": "rating",
             "options": [{"value": option} for option in self.options],
@@ -87,7 +87,7 @@ class TextQuestion(Question):
 
     @computed_field  # type: ignore[misc]
     @property
-    def settings(self) -> Mapping[Any, Any]:
+    def settings(self) -> Mapping[str, Any]:
         return {"type": "text", "use_markdown": self.use_markdown}
 
 
@@ -570,7 +570,7 @@ def _create_question(
         name: str,
         title: str,
         description: str,
-        settings: Mapping[Any, Any],
+        settings: Mapping[str, Any],
         dataset_id: str,
     ) -> None:
         url = self.api_url + f"api/v1/datasets/{dataset_id}/questions"