langchain-ai · hinthornw · Oct 10, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
@@ -1,4 +1,4 @@
 """V2 Evaluation Interface."""

 from __future__ import annotations

@@ -253,6 +253,18 @@
         ... )  # doctest: +ELLIPSIS
         View the evaluation results for experiment:...
     """  # noqa: E501
+    if callable(target) and rh.is_async(target):
+        raise ValueError(
+            "Async functions are not supported by `evaluate`. "
+            "Please use `aevaluate` instead:\n\n"
+            "from langsmith import aevaluate\n\n"
+            "await aevaluate(\n"
+            "    async_target_function,\n"
+            "    data=data,\n"
+            "    evaluators=evaluators,\n"
+            "    # ... other parameters\n"
+            ")"
+        )
     if experiment and experiment_prefix:
         raise ValueError(
             "Expected at most one of 'experiment' or 'experiment_prefix',"

diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
@@ -1,6 +1,7 @@
 """Test the eval runner."""
 
 import asyncio
+import functools
 import itertools
 import json
 import random
@@ -248,6 +249,37 @@ def score_value(run, example):
     assert not fake_request.should_fail
 
 
+def test_evaluate_raises_for_async():
+    async def my_func(inputs: dict):
+        pass
+
+    match = "Async functions are not supported by"
+    with pytest.raises(ValueError, match=match):
+        evaluate(my_func, data="foo")
+
+    async def my_other_func(inputs: dict, other_val: int):
+        pass
+
+    with pytest.raises(ValueError, match=match):
+        evaluate(functools.partial(my_other_func, other_val=3), data="foo")
+
+    try:
+        from langchain_core.runnables import RunnableLambda
+    except ImportError:
+        pytest.skip("langchain-core not installed.")
+
+    @RunnableLambda
+    def foo(inputs: dict):
+        return "bar"
+
+    with pytest.raises(ValueError, match=match):
+        evaluate(foo.ainvoke, data="foo")
+    if sys.version_info < (3, 10):
+        return
+    with pytest.raises(ValueError, match=match):
+        evaluate(functools.partial(foo.ainvoke, inputs={"foo": "bar"}), data="foo")
+
+
 @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher")
 @pytest.mark.parametrize("blocking", [False, True])
 async def test_aevaluate_results(blocking: bool) -> None: