Skip to content

Commit

Permalink
fix: get source code in jupyter notebook for benchmark logics
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesWesch committed Dec 16, 2024
1 parent 08331e1 commit 0c9a174
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 12 deletions.
25 changes: 18 additions & 7 deletions src/documentation/how_tos/studio/how_to_execute_a_benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,32 @@
"\n",
"from documentation.how_tos.example_data import (\n",
" DummyAggregationLogic,\n",
" DummyEvaluationLogic,\n",
" DummyEvaluation,\n",
" DummyTask,\n",
" example_data,\n",
")\n",
"from intelligence_layer.connectors.studio.studio import StudioClient\n",
"from intelligence_layer.evaluation.benchmark.studio_benchmark import (\n",
"from intelligence_layer.connectors import StudioClient\n",
"from intelligence_layer.evaluation import (\n",
" EvaluationLogic,\n",
" Example,\n",
" StudioBenchmarkRepository,\n",
")\n",
"from intelligence_layer.evaluation.dataset.studio_dataset_repository import (\n",
" StudioDatasetRepository,\n",
" SuccessfulExampleOutput,\n",
")\n",
"\n",
"load_dotenv()\n",
"my_example_data = example_data()\n",
"examples = my_example_data.examples"
"examples = my_example_data.examples\n",
"\n",
"\n",
"class DummyEvaluationLogic(EvaluationLogic[str, str, str, DummyEvaluation]):\n",
" def do_evaluate(\n",
" self, example: Example[str, str], *output: SuccessfulExampleOutput[str]\n",
" ) -> DummyEvaluation:\n",
" output_str = \"(\" + (\", \".join(o.output for o in output)) + \")\"\n",
" return DummyEvaluation(\n",
" eval=f\"{example.input}, {example.expected_output}, {output_str} -> evaluation\"\n",
" )"
]
},
{
Expand Down Expand Up @@ -89,7 +100,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "intelligence-layer-LP3DLT23-py3.12",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
Expand Down
4 changes: 4 additions & 0 deletions src/intelligence_layer/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
from .aggregation.in_memory_aggregation_repository import (
InMemoryAggregationRepository as InMemoryAggregationRepository,
)
from .benchmark.studio_benchmark import StudioBenchmark as StudioBenchmark
from .benchmark.studio_benchmark import (
StudioBenchmarkRepository as StudioBenchmarkRepository,
)
from .dataset.dataset_repository import DatasetRepository as DatasetRepository
from .dataset.domain import Dataset as Dataset
from .dataset.domain import Example as Example
Expand Down
69 changes: 69 additions & 0 deletions src/intelligence_layer/evaluation/benchmark/get_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Utilities for working with IPython/Jupyter notebooks."""

import ast
import inspect
import textwrap

from intelligence_layer.evaluation.aggregation.aggregator import AggregationLogic
from intelligence_layer.evaluation.evaluation.evaluator.evaluator import EvaluationLogic


class NotInteractiveEnvironmentError(Exception): ...


def is_running_interactively() -> bool:
"""Check if the code is running in an interactive environment."""
try:
from IPython import get_ipython

return get_ipython() is not None
except ModuleNotFoundError:
return False


def get_notebook_source() -> str:
"""Get the source code of the running notebook."""
from IPython import get_ipython

shell = get_ipython()
if shell is None:
raise NotInteractiveEnvironmentError

if not hasattr(shell, "user_ns"):
raise AttributeError("Cannot access user namespace")

# This is the list of input cells in the notebook
in_list = shell.user_ns["In"]

# Stitch them back into a single "file"
full_source = "\n\n".join(cell for cell in in_list[1:] if cell)

return full_source


def get_class_source(cls: type) -> str:
"""Get the latest source definition of a class in the notebook."""
notebook_source = get_notebook_source()
tree = ast.parse(notebook_source)
class_name = cls.__name__

# We need to walk the entire tree and get the last one since that's the most version of the cls
segment = None
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef) and node.name == class_name:
segment = ast.get_source_segment(notebook_source, node)

if segment is not None:
return segment

raise ValueError(f"Class '{class_name}' not found in the notebook")

def get_source_notebook_safe(logic: EvaluationLogic | AggregationLogic) -> str:
# In ipython, we can't use inspect.getsource on classes defined in the notebook
logic_class = type(logic)
try:
src = inspect.getsource(logic_class)
except OSError:
if is_running_interactively() and inspect.isclass(logic_class):
src = get_class_source(logic_class)
return textwrap.dedent(src)
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import inspect
from collections.abc import Sequence
from datetime import datetime
from http import HTTPStatus
Expand Down Expand Up @@ -29,6 +28,7 @@
Benchmark,
BenchmarkRepository,
)
from intelligence_layer.evaluation.benchmark.get_code import get_source_notebook_safe
from intelligence_layer.evaluation.dataset.domain import ExpectedOutput
from intelligence_layer.evaluation.dataset.studio_dataset_repository import (
StudioDatasetRepository,
Expand Down Expand Up @@ -269,7 +269,7 @@ def create_evaluation_logic_identifier(
evaluation_logic=eval_logic,
)
return EvaluationLogicIdentifier(
logic=inspect.getsource(type(eval_logic)),
logic=get_source_notebook_safe(eval_logic),
input_schema=type_to_schema(evaluator.input_type()),
output_schema=type_to_schema(evaluator.output_type()),
expected_output_schema=type_to_schema(evaluator.expected_output_type()),
Expand All @@ -287,7 +287,7 @@ def create_aggregation_logic_identifier(
aggregation_logic=aggregation_logic,
)
return AggregationLogicIdentifier(
logic=inspect.getsource(type(aggregation_logic)),
logic=get_source_notebook_safe(aggregation_logic),
evaluation_schema=type_to_schema(aggregator.evaluation_type()),
aggregation_schema=type_to_schema(aggregator.aggregated_evaluation_type()),
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
StudioExample,
)
from intelligence_layer.core import Input
from intelligence_layer.evaluation import (
from intelligence_layer.evaluation.dataset.dataset_repository import DatasetRepository
from intelligence_layer.evaluation.dataset.domain import (
Dataset,
DatasetRepository,
Example,
ExpectedOutput,
)
Expand Down

0 comments on commit 0c9a174

Please sign in to comment.