deepset-ai · shadeMe · Mar 5, 2024 · Mar 4, 2024
@@ -18,11 +18,33 @@
 @component
 class UpTrainEvaluator:
     """
-    A component that uses the UpTrain framework to evaluate inputs against a specific metric.
-
-    The supported metrics are defined by :class:`UpTrainMetric`. The inputs of the component
-    metric-dependent. The output is a nested list of evaluation results where each inner list
-    contains the results for a single input.
+    A component that uses the [UpTrain framework](https://docs.uptrain.ai/getting-started/introduction)
+    to evaluate inputs against a specific metric. Supported metrics are defined by `UpTrainMetric`.
+
+    Usage example:
+    ```python
+    from haystack_integrations.components.evaluators.uptrain import UpTrainEvaluator, UpTrainMetric
+    from haystack.utils import Secret
+
+    evaluator = UpTrainEvaluator(
+        metric=UpTrainMetric.FACTUAL_ACCURACY,
+        api="openai",
+        api_key=Secret.from_env_var("OPENAI_API_KEY"),
+    )
+    output = evaluator.run(
+        questions=["Which is the most popular global sport?"],
+        contexts=[
+            [
+                "Football is undoubtedly the world's most popular sport with"
+                "major events like the FIFA World Cup and sports personalities"
+                "like Ronaldo and Messi, drawing a followership of more than 4"
+                "billion people."
+            ]
+        ],
+        responses=["Football is the most popular sport with around 4 billion" "followers worldwide"],
+    )
+    print(output["results"])
+    ```
     """
 
     _backend_metric: Union[Evals, ParametricEval]
@@ -44,15 +66,15 @@ def __init__(
             The metric to use for evaluation.
         :param metric_params:
             Parameters to pass to the metric's constructor.
+            Refer to the `UpTrainMetric` class for more details
+            on required parameters.
         :param api:
-            The API to use for evaluation.
-
-            Supported APIs: "openai", "uptrain".
+            The API to use for evaluation. Supported APIs:
+            `openai`, `uptrain`.
         :param api_key:
             The API key to use.
         :param api_params:
             Additional parameters to pass to the API client.
-
             Required parameters for the UpTrain API: `project_name`.
         """
         self.metric = metric if isinstance(metric, UpTrainMetric) else UpTrainMetric.from_str(metric)
@@ -69,38 +91,20 @@ def __init__(
     @component.output_types(results=List[List[Dict[str, Any]]])
     def run(self, **inputs) -> Dict[str, Any]:
         """
-        Run the UpTrain evaluator.
-
-        Example:
-        ```python
-        pipeline = Pipeline()
-        evaluator = UpTrainEvaluator(
-            metric=UpTrainMetric.FACTUAL_ACCURACY,
-            api="openai",
-            api_key=Secret.from_env_var("OPENAI_API_KEY"),
-        )
-        pipeline.add_component("evaluator", evaluator)
-
-        # Each metric expects a specific set of parameters as input. Refer to the
-        # UpTrainMetric class' documentation for more details.
-        output = pipeline.run({"evaluator": {
-            "questions": ["question],
-            "contexts": [["context", "another context"]],
-            "responses": ["response"]
-        }})
-        ```
+        Run the UpTrain evaluator on the provided inputs.
 
         :param inputs:
             The inputs to evaluate. These are determined by the
             metric being calculated. See `UpTrainMetric` for more
             information.
         :returns:
-            A nested list of metric results. Each input can have one or more
+            A dictionary with a single `results` entry that contains
+            a nested list of metric results. Each input can have one or more
             results, depending on the metric. Each result is a dictionary
             containing the following keys and values:
-                * `name` - The name of the metric.
-                * `score` - The score of the metric.
-                * `explanation` - An optional explanation of the score.
+            - `name` - The name of the metric.
+            - `score` - The score of the metric.
+            - `explanation` - An optional explanation of the score.
         """
         # The backend requires random access to the data, so we can't stream it.
         InputConverters.validate_input_parameters(self.metric, self.descriptor.input_parameters, inputs)
@@ -125,7 +129,12 @@ def run(self, **inputs) -> Dict[str, Any]:
 
     def to_dict(self) -> Dict[str, Any]:
         """
-        Serialize this component to a dictionary.
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        :raises DeserializationError:
+            If the component cannot be serialized.
         """
 
         def check_serializable(obj: Any):
@@ -151,18 +160,17 @@ def check_serializable(obj: Any):
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "UpTrainEvaluator":
         """
-        Deserialize a component from a dictionary.
+        Deserializes the component from a dictionary.
 
         :param data:
-            The dictionary to deserialize from.
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
         """
         deserialize_secrets_inplace(data["init_parameters"], ["api_key"])
         return default_from_dict(cls, data)
 
     def _init_backend(self):
-        """
-        Initialize the UpTrain backend.
-        """
         if isinstance(self.descriptor.backend, Evals):
             if self.metric_params is not None:
                 msg = (

@@ -14,48 +14,51 @@ class UpTrainMetric(Enum):
     Metrics supported by UpTrain.
     """
 
-    #: Context relevance.
+    #: Context relevance.\
     #: Inputs - `questions: List[str], contexts: List[List[str]]`
     CONTEXT_RELEVANCE = "context_relevance"
 
-    #: Factual accuracy.
+    #: Factual accuracy.\
     #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
     FACTUAL_ACCURACY = "factual_accuracy"
 
-    #: Response relevance.
+    #: Response relevance.\
     #: Inputs - `questions: List[str], responses: List[str]`
     RESPONSE_RELEVANCE = "response_relevance"
 
-    #: Response completeness.
+    #: Response completeness.\
     #: Inputs - `questions: List[str], responses: List[str]`
     RESPONSE_COMPLETENESS = "response_completeness"
 
-    #: Response completeness with respect to context.
+    #: Response completeness with respect to context.\
     #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
     RESPONSE_COMPLETENESS_WRT_CONTEXT = "response_completeness_wrt_context"
 
-    #: Response consistency.
+    #: Response consistency.\
     #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
     RESPONSE_CONSISTENCY = "response_consistency"
 
-    #: Response conciseness.
+    #: Response conciseness.\
     #: Inputs - `questions: List[str], responses: List[str]`
     RESPONSE_CONCISENESS = "response_conciseness"
 
-    #: Language critique.
+    #: Language critique.\
     #: Inputs - `responses: List[str]`
     CRITIQUE_LANGUAGE = "critique_language"
 
-    #: Tone critique.
-    #: Inputs - `responses: List[str]`
+    #: Tone critique.\
+    #: Inputs - `responses: List[str]`\
+    #: Parameters - `llm_persona: str`
     CRITIQUE_TONE = "critique_tone"
 
-    #: Guideline adherence.
-    #: Inputs - `questions: List[str], responses: List[str]`
+    #: Guideline adherence.\
+    #: Inputs - `questions: List[str], responses: List[str]`\
+    #: Parameters - `guideline: str`, `guideline_name: str`, `response_schema: Optional[str]`
     GUIDELINE_ADHERENCE = "guideline_adherence"
 
-    #: Response matching.
-    #: Inputs - `responses: List[str], ground_truths: List[str]`
+    #: Response matching.\
+    #: Inputs - `responses: List[str], ground_truths: List[str]`\
+    #: Parameters - `method: str`
     RESPONSE_MATCHING = "response_matching"
 
     def __str__(self):