From efc2c3b03ff507906a8e662ac8b6de367efab6a8 Mon Sep 17 00:00:00 2001
From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com>
Date: Wed, 9 Oct 2024 08:48:58 -0700
Subject: [PATCH 1/2] Better async error message

---
 python/langsmith/evaluation/_runner.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index a040ea7a..d076869c 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -253,6 +253,18 @@ def evaluate(
         ... )  # doctest: +ELLIPSIS
         View the evaluation results for experiment:...
     """  # noqa: E501
+    if callable(target) and rh.is_async(target):
+        raise ValueError(
+            "Async functions are not supported by `evaluate`. "
+            "Please use `aevaluate` instead:\n\n"
+            "from langsmith import aevaluate\n\n"
+            "await aevaluate(\n"
+            "    async_target_function,\n"
+            "    data=data,\n"
+            "    evaluators=evaluators,\n"
+            "    # ... other parameters\n"
+            ")"
+        )
     if experiment and experiment_prefix:
         raise ValueError(
             "Expected at most one of 'experiment' or 'experiment_prefix',"

From e3e11013e5e3e3d32749a8d108796f02021d7a1e Mon Sep 17 00:00:00 2001
From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com>
Date: Wed, 9 Oct 2024 14:31:37 -0700
Subject: [PATCH 2/2] Add tests

---
 .../unit_tests/evaluation/test_runner.py      | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index 1229590c..5f2df6ec 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -1,6 +1,7 @@
 """Test the eval runner."""
 
 import asyncio
+import functools
 import itertools
 import json
 import random
@@ -248,6 +249,36 @@ def score_value(run, example):
     assert not fake_request.should_fail
 
 
+def test_evaluate_raises_for_async():
+    async def my_func(inputs: dict):
+        pass
+
+    match = "Async functions are not supported by"
+    with pytest.raises(ValueError, match=match):
+        evaluate(my_func, data="foo")
+
+    async def my_other_func(inputs: dict, other_val: int):
+        pass
+
+    with pytest.raises(ValueError, match=match):
+        evaluate(functools.partial(my_other_func, other_val=3), data="foo")
+
+    try:
+        from langchain_core.runnables import RunnableLambda
+    except ImportError:
+        pytest.skip("langchain-core not installed.")
+
+    @RunnableLambda
+    def foo(inputs: dict):
+        return "bar"
+
+    with pytest.raises(ValueError, match=match):
+        evaluate(foo.ainvoke, data="foo")
+
+    with pytest.raises(ValueError, match=match):
+        evaluate(functools.partial(foo.ainvoke, inputs={"foo": "bar"}), data="foo")
+
+
 @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher")
 @pytest.mark.parametrize("blocking", [False, True])
 async def test_aevaluate_results(blocking: bool) -> None: