diff --git a/examples/experimental/group_discussion_agents/group_discussion_agents.py b/examples/experimental/group_discussion_agents/group_discussion_agents.py index e4b3c0c2c..8ef55e5c5 100644 --- a/examples/experimental/group_discussion_agents/group_discussion_agents.py +++ b/examples/experimental/group_discussion_agents/group_discussion_agents.py @@ -2,7 +2,7 @@ from aact import Message, NodeFactory from aact.messages import Text, Tick, DataModel, DataModelFactory from sotopia.agents.llm_agent import ainput -from sotopia.experimental.agents import BaseAgent +from sotopia.experimental.agents.base_agent import BaseAgent from sotopia.generation_utils import agenerate from sotopia.generation_utils.generate import StrOutputParser diff --git a/examples/experimental/interview_openhands/interview_openhands.toml b/examples/experimental/interview_openhands/interview_openhands.toml new file mode 100644 index 000000000..2c29138eb --- /dev/null +++ b/examples/experimental/interview_openhands/interview_openhands.toml @@ -0,0 +1,96 @@ +redis_url = "redis://localhost:6379/0" +extra_modules = ["examples.experimental.interview_openhands.llm_agent", "examples.experimental.nodes.initial_message_node",] + + +[[nodes]] +node_name = "Jack" +node_class = "llm_agent" + +[nodes.node_args] +query_interval = 5 +output_channel = "Jack:Jane" +input_text_channels = ["Jane:Jack"] +input_env_channels = ["Runtime:Agent", "Scene:Jack"] +input_tick_channel = "tick/secs/1" +goal = "Your goal is to effectively test Jane's technical ability and finally decide if she has passed the interview. Make sure to also evaluate her communication skills, problem-solving approach, and enthusiasm." +model_name = "gpt-4o-mini" +agent_name = "Jack" + +[[nodes]] +node_name = "Jane" +node_class = "llm_agent" + +[nodes.node_args] +query_interval = 7 +output_channel = "Jane:Jack" +input_text_channels = ["Jack:Jane"] +input_env_channels = ["Runtime:Agent", "Scene:Jane"] +input_tick_channel = "tick/secs/1" +goal = "Your goal is to do well in the interview by demonstrating your technical skills, clear communication, and enthusiasm for the position. Stay calm, ask clarifying questions when needed, and confidently explain your thought process." +model_name = "gpt-4o-mini" +agent_name = "Jane" + +[[nodes]] +node_name = "tick" +node_class = "tick" + +[[nodes]] +node_name = "JaneScene" +node_class = "initial_message" + +[nodes.node_args] +input_tick_channel = "tick/secs/1" +output_channels = ["Scene:Jane"] +env_scenario = """ +You are Jane, a college senior at Stanford University interviewing for a Software Engineering Intern position at Fintech company. You are currently sitting in an office with your interviewer, Jack. +It's natural to feel a bit nervous, but remind yourself that you have prepared well. + +### Goals: +1. **Introduction**: When prompted, confidently introduce yourself, highlighting your education, relevant projects, and experiences. +2. **Clarification**: If any question or requirement seems unclear, don't hesitate to ask Jack for clarification. +3. **Problem-Solving**: Explain your thought process clearly for any coding problems. Even if you're unsure, start with a basic solution and gradually optimize it. +4. **Communication**: Be articulate in your explanations. Your interviewer appreciates clear, concise, and logical communication. +5. **Coding**: Write your code in a file in the /workspace directory. Make sure to justify each part of your solution. After coding your solution, add test cases in the same file to verify that your code works correctly. Explain how your test cases cover different scenarios and edge cases. +6. **Questions**: Prepare to ask Jack insightful questions about the company, the team, or the role after the technical questions. + +Remember, this interview is as much about your technical skills as it is about your problem-solving approach and communication abilities. +""" + +[[nodes]] +node_name = "JackScene" +node_class = "initial_message" + +[nodes.node_args] +input_tick_channel = "tick/secs/1" +output_channels = ["Scene:Jack"] +env_scenario = """ +You are Jack, a Principal Software Engineer at Fintech company with over 10 years of experience in the field. +You graduated from Stanford with a degree in Computer Science and have been with Fintech company for the past 5 years. +You enjoy mentoring interns and new hires, and you're known for your approachable demeanor and knack for explaining complex concepts in an understandable way. +Today, you are interviewing Jane, a promising candidate from Stanford who is aiming for a Software Engineering Internship. + +### Goals: +1. **Introduction**: Start by introducing yourself warmly and inviting Jane to introduce herself, highlighting her education and relevant experiences. +2. **Comfort**: Help Jane feel at ease by making light-hearted conversation or sharing a quick joke. +3. **Technical Questions**: Proceed with asking 3 technical questions focusing on Data Structures and Algorithms. Make sure to: + - Clearly specify the problem statement. + - Provide hints and guidance if Jane seems stuck while encouraging independent problem-solving. +4. **Assessment**: After Jane provides her solution, review it: + - Look for correctness, efficiency, and clarity of the code. + - Ask Jane to explain her solution and discuss any optimizations. + - Run test cases and provide feedback. +5. **Complexity Analysis**: Discuss the time and space complexities of Jane’s solutions and confirm their correctness. +6. **Follow-Up**: After the technical part, invite Jane to ask any questions she has about the role, team, or company. +7. **Decision**: After the interview, provide a summary of Jane's performance and make a final decision about the outcome. + +This interview not only evaluates Jane’s technical skills but also her communication, problem-solving approach, and fit for the team. +""" + +[[nodes]] +node_name = "print" +node_class = "print" + +[nodes.node_args.print_channel_types] +"tick/secs/1" = "tick" +"Jane:Jack" = "agent_action" +"Jack:Jane" = "agent_action" diff --git a/examples/experimental/interview_openhands/llm_agent.py b/examples/experimental/interview_openhands/llm_agent.py new file mode 100644 index 000000000..67b0025f3 --- /dev/null +++ b/examples/experimental/interview_openhands/llm_agent.py @@ -0,0 +1,448 @@ +import logging +import sys +from enum import Enum +from rich.logging import RichHandler +from pydantic import Field + +from typing import Optional + +from aact import Message, NodeFactory +from aact.messages import Text, Tick, DataModel +from aact.messages.registry import DataModelFactory + +from sotopia.experimental.agents.base_agent import BaseAgent + +from sotopia.generation_utils import agenerate +from sotopia.generation_utils.generate import StrOutputParser + +import json + +# Check Python version +if sys.version_info >= (3, 11): + pass +else: + pass + +# Configure logging +FORMAT = "%(asctime)s - %(levelname)s - %(name)s - %(message)s" +logging.basicConfig( + level=logging.WARNING, + format=FORMAT, + datefmt="[%X]", + handlers=[RichHandler()], +) + + +class ActionType(Enum): + NONE = "none" + SPEAK = "speak" + NON_VERBAL = "non-verbal" + LEAVE = "leave" + THOUGHT = "thought" + BROWSE = "browse" + BROWSE_ACTION = "browse_action" + READ = "read" + WRITE = "write" + RUN = "run" + + def __str__(self) -> str: + return self.value + + def __eq__(self, other: object) -> bool: + if isinstance(other, ActionType): + return self.value == other.value + elif isinstance(other, str): + return self.value == other + else: + return NotImplemented + + +@DataModelFactory.register("agent_action") +class AgentAction(DataModel): + agent_name: str = Field(description="the name of the agent") + action_type: ActionType = Field( + description="whether to speak at this turn or choose to not do anything" + ) + argument: str = Field( + description="the utterance if choose to speak, the expression or gesture if choose non-verbal communication, or the physical action if choose action" + ) + path: Optional[str] = Field(description="path of file") + + def to_natural_language(self) -> str: + action_descriptions = { + ActionType.NONE: "did nothing", + ActionType.SPEAK: f'said: "{self.argument}"', + ActionType.THOUGHT: f'thought: "{self.argument}"', + ActionType.BROWSE: f'browsed: "{self.argument}"', + ActionType.RUN: f'ran: "{self.argument}"', + ActionType.READ: f'read: "{self.argument}"', + ActionType.WRITE: f'wrote: "{self.argument}"', + ActionType.NON_VERBAL: f"[{self.action_type.value}] {self.argument}", + ActionType.LEAVE: "left the conversation", + } + + return action_descriptions.get(self.action_type, "performed an unknown action") + + +@NodeFactory.register("llm_agent") +class LLMAgent(BaseAgent[AgentAction | Tick | Text, AgentAction]): + def __init__( + self, + input_text_channels: list[str], + input_tick_channel: str, + input_env_channels: list[str], + output_channel: str, + query_interval: int, + agent_name: str, + goal: str, + model_name: str, + redis_url: str, + ): + super().__init__( + [ + (input_text_channel, AgentAction) + for input_text_channel in input_text_channels + ] + + [ + (input_tick_channel, Tick), + ] + + [(input_env_channel, Text) for input_env_channel in input_env_channels], + [(output_channel, AgentAction)], + redis_url, + ) + self.output_channel = output_channel + self.query_interval = query_interval + self.count_ticks = 0 + self.message_history: list[tuple[str, str, str]] = [] + self.name = agent_name + self.model_name = model_name + self.goal = goal + + async def send(self, message: AgentAction) -> None: + if message.action_type == "speak": + await self.r.publish( + self.output_channel, + Message[AgentAction](data=message).model_dump_json(), + ) + + elif message.action_type in ("browse", "browse_action", "write", "read", "run"): + await self.r.publish( + "Agent:Runtime", + Message[AgentAction](data=message).model_dump_json(), + ) + + def _format_message_history( + self, message_history: list[tuple[str, str, str]] + ) -> str: + ## TODO: akhatua Fix the mapping of action to be gramatically correct + return "\n".join( + (f"{speaker} {action} {message}") + for speaker, action, message in message_history + ) + + def get_action_template(self, selected_actions: list[ActionType]) -> str: + """ + Returns the action template string with selected actions. + + Args: + selected_actions (list[ActionType]): List of ActionType enum members to include in the template. + + Returns: + str: The action template with the selected actions. + """ + base_template = """ You are talking to another agent. + You are {agent_name}.\n + {message_history}\nand you plan to {goal}. + ## Action + What is your next thought or action? Your response must be in JSON format. + + It must be an object, and it must contain two fields: + * `action`, which is one of the actions below + * `args`, which is a map of key-value pairs, specifying the arguments for that action + """ + + action_descriptions = { + str( + ActionType.SPEAK + ): """`speak` - you can talk to the other agents to share information or ask them something. Arguments: + * `content` - the message to send to the other agents (should be short)""", + str( + ActionType.THOUGHT + ): """`thought` - only use this rarely to make a plan, set a goal, record your thoughts. Arguments: + * `content` - the message you send yourself to organize your thoughts (should be short). You cannot think more than 2 turns.""", + str( + ActionType.NONE + ): """`none` - you can choose not to take an action if you are waiting for some data""", + str( + ActionType.NON_VERBAL + ): """`non-verbal` - you can choose to do a non verbal action + * `content` - the non veral action you want to send to other agents. eg: smile, shrug, thumbs up""", + str(ActionType.BROWSE): """`browse` - opens a web page. Arguments: + * `url` - the URL to open, when you browse the web you must use `none` action until you get some information back. When you get the information back you must summarize the article and explain the article to the other agents.""", + str( + ActionType.BROWSE_ACTION + ): """`browse_action` - actions you can take on a web browser + * `command` - the command to run. You have 15 available commands. These commands must be a single string value of command + Options for `command`: + `command` = goto(url: str) + Description: Navigate to a url. + Examples: + goto('http://www.example.com') + + `command` = go_back() + Description: Navigate to the previous page in history. + Examples: + go_back() + + `command` = go_forward() + Description: Navigate to the next page in history. + Examples: + go_forward() + + `command` = noop(wait_ms: float = 1000) + Description: Do nothing, and optionally wait for the given time (in milliseconds). + You can use this to get the current page content and/or wait for the page to load. + Examples: + noop() + noop(500) + + `command` = scroll(delta_x: float, delta_y: float) + Description: Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event. + Examples: + scroll(0, 200) + scroll(-50.2, -100.5) + + `command` = fill(bid, value) + Description: Fill out a form field. It focuses the element and triggers an input event with the entered text. It works for ,