From efc2c3b03ff507906a8e662ac8b6de367efab6a8 Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Wed, 9 Oct 2024 08:48:58 -0700 Subject: [PATCH 1/2] Better async error message --- python/langsmith/evaluation/_runner.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index a040ea7a..d076869c 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -253,6 +253,18 @@ def evaluate( ... ) # doctest: +ELLIPSIS View the evaluation results for experiment:... """ # noqa: E501 + if callable(target) and rh.is_async(target): + raise ValueError( + "Async functions are not supported by `evaluate`. " + "Please use `aevaluate` instead:\n\n" + "from langsmith import aevaluate\n\n" + "await aevaluate(\n" + " async_target_function,\n" + " data=data,\n" + " evaluators=evaluators,\n" + " # ... other parameters\n" + ")" + ) if experiment and experiment_prefix: raise ValueError( "Expected at most one of 'experiment' or 'experiment_prefix'," From e3e11013e5e3e3d32749a8d108796f02021d7a1e Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:31:37 -0700 Subject: [PATCH 2/2] Add tests --- .../unit_tests/evaluation/test_runner.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index 1229590c..5f2df6ec 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -1,6 +1,7 @@ """Test the eval runner.""" import asyncio +import functools import itertools import json import random @@ -248,6 +249,36 @@ def score_value(run, example): assert not fake_request.should_fail +def test_evaluate_raises_for_async(): + async def my_func(inputs: dict): + pass + + match = "Async functions are not supported by" + with pytest.raises(ValueError, match=match): + evaluate(my_func, data="foo") + + async def my_other_func(inputs: dict, other_val: int): + pass + + with pytest.raises(ValueError, match=match): + evaluate(functools.partial(my_other_func, other_val=3), data="foo") + + try: + from langchain_core.runnables import RunnableLambda + except ImportError: + pytest.skip("langchain-core not installed.") + + @RunnableLambda + def foo(inputs: dict): + return "bar" + + with pytest.raises(ValueError, match=match): + evaluate(foo.ainvoke, data="foo") + + with pytest.raises(ValueError, match=match): + evaluate(functools.partial(foo.ainvoke, inputs={"foo": "bar"}), data="foo") + + @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher") @pytest.mark.parametrize("blocking", [False, True]) async def test_aevaluate_results(blocking: bool) -> None: