Skip to content

Commit

Permalink
Merge pull request #272 from sotopia-lab/feature/multiparty
Browse files Browse the repository at this point in the history
feat: enabled saving and evaluation for moderator (#271)
  • Loading branch information
JXZhou0224 authored Jan 18, 2025
2 parents 3b9e842 + cb441c0 commit ea14fda
Show file tree
Hide file tree
Showing 20 changed files with 313 additions and 141 deletions.
4 changes: 2 additions & 2 deletions examples/experiment_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from sotopia.envs.evaluators import (
EvaluationForTwoAgents,
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
RuleBasedTerminatedEvaluator,
SotopiaDimensions,
)
Expand Down Expand Up @@ -164,7 +164,7 @@ def _iterate_env_agent_combo_not_in_db(
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
],
terminal_evaluators=[
ReachGoalLLMEvaluator(
EpisodeLLMEvaluator(
model_names["env"],
EvaluationForTwoAgents[evaluation_dimensions], # type: ignore
# TODO check how to do type annotation
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import logging
import sys
import json
from rich.logging import RichHandler

from aact import NodeFactory

from sotopia.experimental.agents.base_agent import BaseAgent
from sotopia.experimental.agents.datamodels import Observation, AgentAction
from sotopia.database.persistent_profile import AgentProfile
from typing import Any

from sotopia.generation_utils import agenerate
from sotopia.generation_utils.generate import StrOutputParser
Expand Down Expand Up @@ -33,11 +36,13 @@ def __init__(
input_channels: list[str],
output_channel: str,
query_interval: int,
agent_name: str,
node_name: str,
goal: str,
model_name: str,
redis_url: str,
goal: str,
agent_name: str = "",
background: dict[str, Any] | None = None,
agent_pk: str | None = None,
redis_url: str = "redis://localhost:6379/0",
):
super().__init__(
[(input_channel, Observation) for input_channel in input_channels],
Expand All @@ -47,23 +52,59 @@ def __init__(
)
self.output_channel = output_channel
self.query_interval = query_interval
self.count_ticks = 0
self.count_ticks: int = 0
self.message_history: list[Observation] = []
self.name = agent_name
self.model_name = model_name
self.goal = goal
self.goal: str = goal
self.model_name: str = model_name
self.agent_profile_pk: str | None = agent_pk
self.name: str = agent_name
self.background: dict[str, Any] | None = background
self.awake: bool = False

def set_profile(self, use_pk_value: bool) -> None:
if not use_pk_value:
assert (
self.background is not None and self.name is not None
), "Background and name must be provided"
if " " in self.name:
first_name, last_name = self.name.split(" ", 1)
else:
first_name = self.name
last_name = ""
profile = AgentProfile(
first_name=first_name, last_name=last_name, **self.background
)
profile.save()
else:
profile = AgentProfile.get(pk=self.agent_profile_pk)

self.agent_profile_pk = profile.pk
self.name = " ".join([profile.first_name, profile.last_name]).strip()
self.background = profile.model_dump()

def _format_message_history(self, message_history: list[Observation]) -> str:
## TODO: akhatua Fix the mapping of action to be gramatically correct
return "\n".join(message.to_natural_language() for message in message_history)

async def aact(self, obs: Observation) -> AgentAction:
if obs.turn_number == -1:
if self.awake:
return AgentAction(
agent_name=self.name,
output_channel=self.output_channel,
action_type="none",
argument="",
)
args = json.loads(obs.last_turn)
self.set_profile(args["use_pk_value"])
self.awake = True
return AgentAction(
agent_name=self.name,
output_channel=self.output_channel,
action_type="none",
argument=self.model_name,
argument=json.dumps(
{"pk": self.agent_profile_pk, "model_name": self.model_name}
),
)

self.message_history.append(obs)
Expand Down
21 changes: 18 additions & 3 deletions examples/experimental/sotopia_original_replica/origin.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
redis_url = "redis://localhost:6379/0"
extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator"]
extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator","sotopia.experimental.agents.evaluators"]


[[nodes]]
Expand All @@ -9,11 +9,13 @@ node_class = "moderator"
[nodes.node_args]
output_channels = ["moderator:Jane", "moderator:Jack"]
input_channels = ["Jane:moderator", "Jack:moderator"]
agent_backgrounds = {"Jane" = "", "Jack" = ""}
evaluator_channels = [["evaluator:moderator","moderator:evaluator"]]
agent_mapping = {"moderator:Jane" = "Jane", "moderator:Jack" = "Jack"}
scenario = "Two friends are sitting in a cafe and catching up with each other's lives."
max_turns = 2
max_turns = 3
push_to_db = false
evaluate_episode = true
use_pk_value = false

[[nodes]]
node_name = "Jack"
Expand All @@ -26,6 +28,8 @@ output_channel = "Jack:moderator"
goal = "Your goal is to borrow 5000 dollars from Jane."
model_name = "gpt-4o-mini"
agent_name = "Jack"
background = {"occupation" = "construction worker"}
agent_pk = ""


[[nodes]]
Expand All @@ -39,6 +43,8 @@ input_channels = ["moderator:Jane"]
goal = "Your goal is to help Jack however, you are in a finicial crisis yourself and can only afford to give him 500 dollars."
model_name = "gpt-4o-mini"
agent_name = "Jane"
background = {"occupation" = "gardener"}
agent_pk = ""

[[nodes]]
node_name = "chat_print"
Expand All @@ -50,3 +56,12 @@ node_class = "chat_print"

[nodes.node_args]
env_agents = ["Jack", "Jane"]

[[nodes]]
node_name = "evaluator"
node_class = "evaluator"

[nodes.node_args]
input_channels = ["moderator:evaluator"]
output_channels = ["evaluator:moderator"]
model_name = "gpt-4o-mini"
4 changes: 2 additions & 2 deletions examples/fix_missing_episodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from sotopia.envs.evaluators import (
EvaluationForTwoAgents,
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
RuleBasedTerminatedEvaluator,
SotopiaDimensions,
)
Expand Down Expand Up @@ -229,7 +229,7 @@ def yield_env_agent_combo(
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
],
terminal_evaluators=[
ReachGoalLLMEvaluator(
EpisodeLLMEvaluator(
model_names["env"],
EvaluationForTwoAgents[SotopiaDimensions],
),
Expand Down
4 changes: 2 additions & 2 deletions examples/fix_missing_episodes_with_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
)
from sotopia.envs.evaluators import (
EvaluationForTwoAgents,
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
RuleBasedTerminatedEvaluator,
SotopiaDimensions,
)
Expand Down Expand Up @@ -327,7 +327,7 @@ def yield_env_agent_combo(
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
],
terminal_evaluators=[
ReachGoalLLMEvaluator(
EpisodeLLMEvaluator(
model_names["env"],
EvaluationForTwoAgents[SotopiaDimensions],
),
Expand Down
4 changes: 2 additions & 2 deletions examples/use_custom_dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import Type, Union
from redis_om import Migrator
from sotopia.envs.evaluators import (
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
EvaluationForTwoAgents,
RuleBasedTerminatedEvaluator,
)
Expand Down Expand Up @@ -152,7 +152,7 @@ def run_simple_sample_with_custom_samples(
custom_dimensions, list_name="custom"
)
evaluator = RuleBasedTerminatedEvaluator(max_turn_number=10, max_stale_turn=2)
terminal_evaluator = ReachGoalLLMEvaluator(
terminal_evaluator = EpisodeLLMEvaluator(
model_name="gpt-4o-mini",
response_format_class=EvaluationForTwoAgents[custom_dimensions_type], # type: ignore
)
Expand Down
6 changes: 3 additions & 3 deletions sotopia-chat/chat_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
EnvironmentProfile,
)
from sotopia.envs.evaluators import (
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
RuleBasedTerminatedEvaluator,
)
from sotopia.envs.parallel import ParallelSotopiaEnv
Expand Down Expand Up @@ -64,7 +64,7 @@ async def _start_server_with_two_session_ids_and_agent_env_combo(
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
],
terminal_evaluators=[
ReachGoalLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
EpisodeLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
],
)
random.shuffle(session_ids)
Expand Down Expand Up @@ -97,7 +97,7 @@ async def _start_server_with_one_session_id_and_agent_env_combo(
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
],
terminal_evaluators=[
ReachGoalLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
EpisodeLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
],
)

Expand Down
4 changes: 2 additions & 2 deletions sotopia/api/fastapi_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sotopia.envs.parallel import ParallelSotopiaEnv
from sotopia.envs.evaluators import (
RuleBasedTerminatedEvaluator,
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
EvaluationForTwoAgents,
SotopiaDimensions,
)
Expand Down Expand Up @@ -267,7 +267,7 @@ async def nonstreaming_simulation(
),
],
"terminal_evaluators": [
ReachGoalLLMEvaluator(
EpisodeLLMEvaluator(
simulation_request.models[0],
EvaluationForTwoAgents[SotopiaDimensions],
),
Expand Down
4 changes: 2 additions & 2 deletions sotopia/api/websocket_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from sotopia.envs.evaluators import (
EvaluationForTwoAgents,
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
RuleBasedTerminatedEvaluator,
)
from sotopia.agents import Agents, LLMAgent
Expand Down Expand Up @@ -98,7 +98,7 @@ def get_env_agents(
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
],
terminal_evaluators=[
ReachGoalLLMEvaluator(
EpisodeLLMEvaluator(
evaluator_model,
EvaluationForTwoAgents[evaluation_dimensions], # type: ignore
),
Expand Down
4 changes: 2 additions & 2 deletions sotopia/cli/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from sotopia.database.serialization import get_rewards_from_episode
from sotopia.envs.evaluators import (
EvaluationForTwoAgents,
ReachGoalLLMEvaluator,
EpisodeLLMEvaluator,
RuleBasedTerminatedEvaluator,
SotopiaDimensions,
)
Expand Down Expand Up @@ -363,7 +363,7 @@ def _list_all_env_agent_combo_not_in_db(
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
],
terminal_evaluators=[
ReachGoalLLMEvaluator(
EpisodeLLMEvaluator(
model_names["env"],
EvaluationForTwoAgents[SotopiaDimensions],
),
Expand Down
2 changes: 1 addition & 1 deletion sotopia/database/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class BaseEpisodeLog(BaseModel):
tag: str | None = Field(index=True, default="")
models: list[str] | None = Field(index=True, default=[])
messages: list[list[tuple[str, str, str]]] # Messages arranged by turn
reasoning: str
reasoning: str = Field(default="")
rewards: list[tuple[float, dict[str, float]] | float] # Rewards arranged by turn
rewards_prompt: str

Expand Down
2 changes: 1 addition & 1 deletion sotopia/envs/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ async def __acall__(
return self(turn_number, messages)


class ReachGoalLLMEvaluator(Evaluator, Generic[T_eval_dim]):
class EpisodeLLMEvaluator(Evaluator, Generic[T_eval_dim]):
@beartype
def __init__(
self,
Expand Down
Loading

0 comments on commit ea14fda

Please sign in to comment.