deepset-ai · shadeMe · Mar 4, 2024 · Mar 4, 2024
@@ -20,12 +20,30 @@
 @component
 class RagasEvaluator:
     """
-    A component that uses the Ragas framework to evaluate inputs against a specific metric.
-
-    The supported metrics are defined by `RagasMetric`.
-    Most of them require an OpenAI API key to be provided as an environment variable "OPENAI_API_KEY".
-    The inputs of the component are metric-dependent.
-    The output is a nested list of evaluation results where each inner list contains the results for a single input.
+    A component that uses the [Ragas framework](https://docs.ragas.io/) to evaluate
+    inputs against a specific metric. Supported metrics are defined by `RagasMetric`.
+
+    Usage example:
+    ```python
+    from haystack_integrations.components.evaluators.ragas import RagasEvaluator, RagasMetric
+
+    evaluator = RagasEvaluator(
+        metric=RagasMetric.CONTEXT_PRECISION,
+    )
+    output = evaluator.run(
+        questions=["Which is the most popular global sport?"],
+        contexts=[
+            [
+                "Football is undoubtedly the world's most popular sport with"
+                "major events like the FIFA World Cup and sports personalities"
+                "like Ronaldo and Messi, drawing a followership of more than 4"
+                "billion people."
+            ]
+        ],
+        ground_truths=["Football is the most popular sport with around 4 billion" "followers worldwide"],
+    )
+    print(output["results"])
+    ```
     """
 
     # Wrapped for easy mocking.
@@ -44,6 +62,8 @@ def __init__(
             The metric to use for evaluation.
         :param metric_params:
             Parameters to pass to the metric's constructor.
+            Refer to the `RagasMetric` class for more details
+            on required parameters.
         """
         self.metric = metric if isinstance(metric, RagasMetric) else RagasMetric.from_str(metric)
         self.metric_params = metric_params or {}
@@ -56,9 +76,6 @@ def __init__(
         component.set_input_types(self, **expected_inputs)
 
     def _init_backend(self):
-        """
-        Initialize the Ragas backend and validate inputs.
-        """
         self._backend_callable = RagasEvaluator._invoke_evaluate
 
     def _init_metric(self):
@@ -74,29 +91,19 @@ def _invoke_evaluate(dataset: Dataset, metric: Metric) -> Result:
     @component.output_types(results=List[List[Dict[str, Any]]])
     def run(self, **inputs) -> Dict[str, Any]:
         """
-        Run the Ragas evaluator.
-
-        Example:
-        ```python
-        p = Pipeline()
-        evaluator = RagasEvaluator(
-            metric=RagasMetric.CONTEXT_PRECISION,
-        )
-        p.add_component("evaluator", evaluator)
-
-        results = p.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "ground_truths": GROUND_TRUTHS}})
-        ```
+        Run the Ragas evaluator on the provided inputs.
 
         :param inputs:
             The inputs to evaluate. These are determined by the
-            metric being calculated. See :class:`RagasMetric` for more
+            metric being calculated. See `RagasMetric` for more
             information.
         :returns:
-            A nested list of metric results. Each input can have one or more
+            A dictionary with a single `results` entry that contains
+            a nested list of metric results. Each input can have one or more
             results, depending on the metric. Each result is a dictionary
             containing the following keys and values:
-                * `name` - The name of the metric.
-                * `score` - The score of the metric.
+            - `name` - The name of the metric.
+            - `score` - The score of the metric.
         """
         InputConverters.validate_input_parameters(self.metric, self.descriptor.input_parameters, inputs)
         converted_inputs: List[Dict[str, str]] = list(self.descriptor.input_converter(**inputs))  # type: ignore
@@ -113,7 +120,12 @@ def run(self, **inputs) -> Dict[str, Any]:
 
     def to_dict(self) -> Dict[str, Any]:
         """
-        Serialize this component to a dictionary.
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        :raises DeserializationError:
+            If the component cannot be serialized.
         """
 
         def check_serializable(obj: Any):
@@ -136,9 +148,11 @@ def check_serializable(obj: Any):
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "RagasEvaluator":
         """
-        Deserialize a component from a dictionary.
+        Deserializes the component from a dictionary.
 
         :param data:
-            The dictionary to deserialize from.
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
         """
         return default_from_dict(cls, data)
@@ -50,40 +50,44 @@ class RagasMetric(RagasBaseEnum):
     Metrics supported by Ragas.
     """
 
-    #: Answer correctness
-    #: Inputs - `questions: List[str], responses: List[str], ground_truths: List[str]`
+    #: Answer correctness.\
+    #: Inputs - `questions: List[str], responses: List[str], ground_truths: List[str]`\
+    #: Parameters - `weights: Tuple[float, float]`
     ANSWER_CORRECTNESS = "answer_correctness"
 
-    #: Faithfulness
+    #: Faithfulness.\
     #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
     FAITHFULNESS = "faithfulness"
 
-    #: Answer similarity
-    #: Inputs - `responses: List[str], ground_truths: List[str]`
+    #: Answer similarity.\
+    #: Inputs - `responses: List[str], ground_truths: List[str]`\
+    #: Parameters - `threshold: float`
     ANSWER_SIMILARITY = "answer_similarity"
 
-    #: Context precision
+    #: Context precision.\
     #: Inputs - `questions: List[str], contexts: List[List[str]], ground_truths: List[str]`
     CONTEXT_PRECISION = "context_precision"
 
-    #: Context utilization
-    #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
+    #: Context utilization.
+    #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`\
     CONTEXT_UTILIZATION = "context_utilization"
 
-    #: Context recall
-    #: Inputs - `questions: List[str], contexts: List[List[str]], ground_truths: List[str]`
+    #: Context recall.
+    #: Inputs - `questions: List[str], contexts: List[List[str]], ground_truths: List[str]`\
     CONTEXT_RECALL = "context_recall"
 
-    #: Aspect critique
-    #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
+    #: Aspect critique.
+    #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`\
+    #: Parameters - `name: str, definition: str, strictness: int`
     ASPECT_CRITIQUE = "aspect_critique"
 
-    #: Context relevancy
+    #: Context relevancy.\
     #: Inputs - `questions: List[str], contexts: List[List[str]]`
     CONTEXT_RELEVANCY = "context_relevancy"
 
-    #: Answer relevancy
-    #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
+    #: Answer relevancy.\
+    #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`\
+    #: Parameters - `strictness: int`
     ANSWER_RELEVANCY = "answer_relevancy"