Improve Text Generation Instructions and Tests (#690)

Co-authored-by: b.nativi <[email protected]>
Striveworks · Aug 6, 2024 · f28acba · f28acba
1 parent 93b4bb0
commit f28acba
Show file tree

Hide file tree

Showing 24 changed files with 1,931 additions and 1,127 deletions.
diff --git a/api/tests/functional-tests/backend/core/test_llm_clients.py b/api/tests/functional-tests/backend/core/test_llm_clients.py
diff --git a/api/tests/functional-tests/backend/metrics/test_text_generation.py b/api/tests/functional-tests/backend/metrics/test_text_generation.py
@@ -145,7 +145,7 @@ def rag_data(
                 annotations=[
                     schemas.Annotation(
                         text=RAG_PREDICTIONS[i],
-                        context=RAG_CONTEXT[i],
+                        context_list=RAG_CONTEXT[i],
                     )
                 ],
             )
@@ -376,7 +376,7 @@ def two_text_generation_datasets(
                 annotations=[
                     schemas.Annotation(
                         text=RAG_PREDICTIONS[i],
-                        context=RAG_CONTEXT[i],
+                        context_list=RAG_CONTEXT[i],
                     )
                 ],
             )
@@ -540,40 +540,40 @@ def mocked_coherence(
 def mocked_context_relevance(
     self,
     query: str,
-    context: list[str],
+    context_list: list[str],
 ):
     ret_dict = {
         (RAG_QUERIES[0], tuple(RAG_CONTEXT[0])): 0.75,
         (RAG_QUERIES[1], tuple(RAG_CONTEXT[1])): 1.0,
         (RAG_QUERIES[2], tuple(RAG_CONTEXT[2])): 0.25,
     }
-    return ret_dict[(query, tuple(context))]
+    return ret_dict[(query, tuple(context_list))]
 
 
 def mocked_faithfulness(
     self,
     text: str,
-    context: list[str],
+    context_list: list[str],
 ):
     ret_dict = {
         (RAG_PREDICTIONS[0], tuple(RAG_CONTEXT[0])): 0.4,
         (RAG_PREDICTIONS[1], tuple(RAG_CONTEXT[1])): 0.55,
         (RAG_PREDICTIONS[2], tuple(RAG_CONTEXT[2])): 0.6666666666666666,
     }
-    return ret_dict[(text, tuple(context))]
+    return ret_dict[(text, tuple(context_list))]
 
 
 def mocked_hallucination(
     self,
     text: str,
-    context: list[str],
+    context_list: list[str],
 ):
     ret_dict = {
         (RAG_PREDICTIONS[0], tuple(RAG_CONTEXT[0])): 0.0,
         (RAG_PREDICTIONS[1], tuple(RAG_CONTEXT[1])): 0.0,
         (RAG_PREDICTIONS[2], tuple(RAG_CONTEXT[2])): 0.25,
     }
-    return ret_dict[(text, tuple(context))]
+    return ret_dict[(text, tuple(context_list))]
 
 
 def mocked_toxicity(

diff --git a/api/tests/unit-tests/schemas/test_metrics.py b/api/tests/unit-tests/schemas/test_metrics.py
@@ -641,7 +641,7 @@ def test_ContextRelevanceMetric():
         parameters={
             "dataset_uid": "01",
             "dataset_name": "test_dataset",
-            "context": ["context1", "context2"],
+            "context_list": ["context1", "context2"],
         },
     )
 
@@ -651,7 +651,7 @@ def test_ContextRelevanceMetric():
             parameters={
                 "dataset_uid": "01",
                 "dataset_name": "test_dataset",
-                "context": ["context1", "context2"],
+                "context_list": ["context1", "context2"],
             },
         )
 
@@ -661,7 +661,7 @@ def test_ContextRelevanceMetric():
             parameters={
                 "dataset_uid": "01",
                 "dataset_name": "test_dataset",
-                "context": ["context1", "context2"],
+                "context_list": ["context1", "context2"],
             },
         )
 
@@ -686,7 +686,7 @@ def test_FaithfulnessMetric():
             "dataset_uid": "01",
             "dataset_name": "test_dataset",
             "prediction": "some prediction",
-            "context": ["context1", "context2"],
+            "context_list": ["context1", "context2"],
         },
     )
 
@@ -697,7 +697,7 @@ def test_FaithfulnessMetric():
                 "dataset_uid": "01",
                 "dataset_name": "test_dataset",
                 "prediction": "some prediction",
-                "context": ["context1", "context2"],
+                "context_list": ["context1", "context2"],
             },
         )
 
@@ -708,7 +708,7 @@ def test_FaithfulnessMetric():
                 "dataset_uid": "01",
                 "dataset_name": "test_dataset",
                 "prediction": "some prediction",
-                "context": ["context1", "context2"],
+                "context_list": ["context1", "context2"],
             },
         )
 
@@ -733,7 +733,7 @@ def test_HallucinationMetric():
             "dataset_uid": "01",
             "dataset_name": "test_dataset",
             "prediction": "some prediction",
-            "context": ["context1", "context2"],
+            "context_list": ["context1", "context2"],
         },
     )
 
@@ -744,7 +744,7 @@ def test_HallucinationMetric():
                 "dataset_uid": "01",
                 "dataset_name": "test_dataset",
                 "prediction": "some prediction",
-                "context": ["context1", "context2"],
+                "context_list": ["context1", "context2"],
             },
         )
 
@@ -755,7 +755,7 @@ def test_HallucinationMetric():
                 "dataset_uid": "01",
                 "dataset_name": "test_dataset",
                 "prediction": "some prediction",
-                "context": ["context1", "context2"],
+                "context_list": ["context1", "context2"],
             },
         )
 

diff --git a/api/valor_api/backend/core/annotation.py b/api/valor_api/backend/core/annotation.py
@@ -55,14 +55,6 @@ def _create_embedding(
     return row.id
 
 
-def _format_context(
-    context: str | list[str] | None,
-) -> list[str] | None:
-    if isinstance(context, str):
-        context = [context]
-    return context
-
-
 def create_annotations(
     db: Session,
     annotations: list[list[schemas.Annotation]],
@@ -116,7 +108,7 @@ def create_annotations(
                 db=db, value=annotation.embedding
             ),
             "text": annotation.text,
-            "context": _format_context(annotation.context),
+            "context_list": annotation.context_list,
             "is_instance": annotation.is_instance,
             "implied_task_types": annotation.implied_task_types,
         }
@@ -176,7 +168,7 @@ def create_skipped_annotations(
             raster=None,
             embedding_id=None,
             text=None,
-            context=None,
+            context_list=None,
             is_instance=False,
             implied_task_types=[TaskType.EMPTY],
         )
@@ -283,7 +275,7 @@ def get_annotation(
         raster=raster,
         embedding=embedding,
         text=annotation.text,
-        context=annotation.context,
+        context_list=annotation.context_list,
         is_instance=annotation.is_instance,
         implied_task_types=annotation.implied_task_types,
     )