diff --git a/agenthub/codeact_agent/__init__.py b/agenthub/codeact_agent/__init__.py
index 8b925723b623..90ee2bf40da1 100644
--- a/agenthub/codeact_agent/__init__.py
+++ b/agenthub/codeact_agent/__init__.py
@@ -2,12 +2,21 @@
import re
from litellm import completion
from termcolor import colored
-from typing import List, Dict
+from typing import List, Mapping
+
+from opendevin.agent import Agent
+from opendevin.state import State
+from opendevin.action import (
+ Action,
+ CmdRunAction,
+ AgentEchoAction,
+ AgentFinishAction,
+)
+from opendevin.observation import (
+ CmdOutputObservation,
+ AgentMessageObservation,
+)
-from opendevin.agent import Agent, Message, Role
-from opendevin.lib.event import Event
-from opendevin.lib.command_manager import CommandManager
-from opendevin.sandbox.sandbox import DockerInteractive
assert (
"OPENAI_API_KEY" in os.environ
@@ -53,9 +62,7 @@ def parse_response(response) -> str:
class CodeActAgent(Agent):
def __init__(
self,
- instruction: str,
- workspace_dir: str,
- max_steps: int = 100
+ model_name: str
) -> None:
"""
Initializes a new instance of the CodeActAgent class.
@@ -64,69 +71,68 @@ def __init__(
- instruction (str): The instruction for the agent to execute.
- max_steps (int): The maximum number of steps to run the agent.
"""
- super().__init__(instruction, workspace_dir, max_steps)
- self._history = [Message(Role.SYSTEM, SYSTEM_MESSAGE)]
- self._history.append(Message(Role.USER, instruction))
- self.env = DockerInteractive(workspace_dir=workspace_dir)
- print(colored("===USER:===\n" + instruction, "green"))
-
- def _history_to_messages(self) -> List[Dict]:
- return [message.to_dict() for message in self._history]
-
- def run(self) -> None:
- """
- Starts the execution of the assigned instruction. This method should
- be implemented by subclasses to define the specific execution logic.
- """
- for _ in range(self.max_steps):
- response = completion(
- messages=self._history_to_messages(),
- model=self.model_name,
- stop=[""],
- temperature=0.0,
- seed=42,
- )
- action = parse_response(response)
- self._history.append(Message(Role.ASSISTANT, action))
- print(colored("===ASSISTANT:===\n" + action, "yellow"))
-
- command = re.search(r"(.*)", action, re.DOTALL)
- if command is not None:
- # a command was found
- command_group = command.group(1)
- if command_group.strip() == "exit":
- print(colored("Exit received. Exiting...", "red"))
- break
- # execute the code
- # TODO: does exit_code get loaded into Message?
- exit_code, observation = self.env.execute(command_group)
- self._history.append(Message(Role.ASSISTANT, observation))
- print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
- else:
- # we could provide a error message for the model to continue similar to
- # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
- observation = INVALID_INPUT_MESSAGE
- self._history.append(Message(Role.ASSISTANT, observation))
- print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
-
- self.env.close()
-
- def chat(self, message: str) -> None:
- """
- Optional method for interactive communication with the agent during its execution. Implementations
- can use this method to modify the agent's behavior or state based on chat inputs.
+ super().__init__(model_name)
+ self.messages: List[Mapping[str, str]] = []
+ self.instruction: str = ""
+
+ def step(self, state: State) -> Action:
+ if len(self.messages) == 0:
+ assert self.instruction, "Expecting instruction to be set"
+ self.messages = [
+ {"role": "system", "content": SYSTEM_MESSAGE},
+ {"role": "user", "content": self.instruction},
+ ]
+ print(colored("===USER:===\n" + self.instruction, "green"))
+
+ updated_info = state.updated_info
+
+ if updated_info:
+ for prev_action, obs in updated_info:
+ assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
+
+ if isinstance(obs, AgentMessageObservation): # warning message from itself
+ self.messages.append({"role": "user", "content": obs.content})
+ print(colored("===USER:===\n" + obs.content, "green"))
+ elif isinstance(obs, CmdOutputObservation):
+ content = "OBSERVATION:\n" + obs.content
+ content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
+ self.messages.append({"role": "user", "content": content})
+ print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
+ else:
+ raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
+
+ response = completion(
+ messages=self.messages,
+ model=self.model_name,
+ stop=[""],
+ temperature=0.0,
+ seed=42,
+ )
+ action_str: str = parse_response(response)
+ self.messages.append({"role": "assistant", "content": action_str})
+ print(colored("===ASSISTANT:===\n" + action_str, "yellow"))
+
+ command = re.search(r"(.*)", action_str, re.DOTALL)
+ if command is not None:
+ # a command was found
+ command_group = command.group(1)
+ if command_group.strip() == "exit":
+ print(colored("Exit received. Exiting...", "red"))
+ return AgentFinishAction()
+ return CmdRunAction(command = command_group)
+ # # execute the code
+ # # TODO: does exit_code get loaded into Message?
+ # exit_code, observation = self.env.execute(command_group)
+ # self._history.append(Message(Role.ASSISTANT, observation))
+ # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+ else:
+ # we could provide a error message for the model to continue similar to
+ # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
+ # observation = INVALID_INPUT_MESSAGE
+ # self._history.append(Message(Role.ASSISTANT, observation))
+ # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+ return AgentEchoAction(content=INVALID_INPUT_MESSAGE) # warning message to itself
- Parameters:
- - message (str): The chat message or command.
- """
- raise NotImplementedError
-
- # TODO: implement these abstract methods
- def add_event(self, event: Event) -> None:
- raise NotImplementedError("Implement this abstract method")
-
- def step(self, cmd_mgr: CommandManager) -> Event:
- raise NotImplementedError("Implement this abstract method")
def search_memory(self, query: str) -> List[str]:
raise NotImplementedError("Implement this abstract method")
diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py
index b1a06b452689..5546eaf33e44 100644
--- a/agenthub/langchains_agent/__init__.py
+++ b/agenthub/langchains_agent/__init__.py
@@ -1,8 +1,28 @@
-from typing import List, Any
+from typing import List, Dict, Type
+import agenthub.langchains_agent.utils.llm as llm
from opendevin.agent import Agent
-from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl
-from opendevin.lib.event import Event
+from opendevin.action import (
+ Action,
+ CmdRunAction,
+ CmdKillAction,
+ BrowseURLAction,
+ FileReadAction,
+ FileWriteAction,
+ AgentRecallAction,
+ AgentThinkAction,
+ AgentFinishAction,
+)
+from opendevin.observation import (
+ Observation,
+ CmdOutputObservation,
+ BrowserOutputObservation,
+)
+from opendevin.state import State
+
+from agenthub.langchains_agent.utils.monologue import Monologue
+from agenthub.langchains_agent.utils.memory import LongTermMemory
+
INITIAL_THOUGHTS = [
"I exist!",
@@ -43,59 +63,135 @@
]
+MAX_OUTPUT_LENGTH = 5000
+MAX_MONOLOGUE_LENGTH = 20000
+
+
+ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
+ "run": CmdRunAction,
+ "kill": CmdKillAction,
+ "browse": BrowseURLAction,
+ "read": FileReadAction,
+ "write": FileWriteAction,
+ "recall": AgentRecallAction,
+ "think": AgentThinkAction,
+ "finish": AgentFinishAction,
+}
+
+CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
+
class LangchainsAgent(Agent):
_initialized = False
- agent: Any = None
+
+ def __init__(self, model_name: str):
+ super().__init__(model_name)
+ self.monologue = Monologue(self.model_name)
+ self.memory = LongTermMemory()
+
+ def _add_event(self, event: dict):
+ if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
+ event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
+
+ self.monologue.add_event(event)
+ self.memory.add_event(event)
+ if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
+ self.monologue.condense()
def _initialize(self):
if self._initialized:
return
+
if self.instruction is None or self.instruction == "":
raise ValueError("Instruction must be provided")
- self.agent = LangchainsAgentImpl(self.instruction, self.model_name)
+
next_is_output = False
for thought in INITIAL_THOUGHTS:
thought = thought.replace("$TASK", self.instruction)
if next_is_output:
- event = Event("output", {"output": thought})
+ d = {"action": "output", "args": {"output": thought}}
next_is_output = False
else:
if thought.startswith("RUN"):
command = thought.split("RUN ")[1]
- event = Event("run", {"command": command})
+ d = {"action": "run", "args": {"command": command}}
next_is_output = True
+
elif thought.startswith("RECALL"):
query = thought.split("RECALL ")[1]
- event = Event("recall", {"query": query})
+ d = {"action": "recall", "args": {"query": query}}
next_is_output = True
+
elif thought.startswith("BROWSE"):
url = thought.split("BROWSE ")[1]
- event = Event("browse", {"url": url})
+ d = {"action": "browse", "args": {"url": url}}
next_is_output = True
else:
- event = Event("think", {"thought": thought})
- self.agent.add_event(event)
- self._initialized = True
+ d = {"action": "think", "args": {"thought": thought}}
- def add_event(self, event: Event) -> None:
- if self.agent:
- self.agent.add_event(event)
+ self._add_event(d)
+ self._initialized = True
- def step(self, cmd_mgr) -> Event:
+ def step(self, state: State) -> Action:
self._initialize()
- return self.agent.get_next_action(cmd_mgr)
+ # TODO: make langchains agent use Action & Observation
+ # completly from ground up
- def search_memory(self, query: str) -> List[str]:
- return self.agent.memory.search(query)
+ # Translate state to action_dict
+ for prev_action, obs in state.updated_info:
+ if isinstance(obs, CmdOutputObservation):
+ if obs.error:
+ d = {"action": "error", "args": {"output": obs.content}}
+ else:
+ d = {"action": "output", "args": {"output": obs.content}}
+ # elif isinstance(obs, UserMessageObservation):
+ # d = {"action": "output", "args": {"output": obs.message}}
+ # elif isinstance(obs, AgentMessageObservation):
+ # d = {"action": "output", "args": {"output": obs.message}}
+ elif isinstance(obs, (BrowserOutputObservation, Observation)):
+ d = {"action": "output", "args": {"output": obs.content}}
+ else:
+ raise NotImplementedError(f"Unknown observation type: {obs}")
+ self._add_event(d)
- def chat(self, message: str) -> None:
- """
- Optional method for interactive communication with the agent during its execution. Implementations
- can use this method to modify the agent's behavior or state based on chat inputs.
- Parameters:
- - message (str): The chat message or command.
- """
- raise NotImplementedError
+ if isinstance(prev_action, CmdRunAction):
+ d = {"action": "run", "args": {"command": prev_action.command}}
+ elif isinstance(prev_action, CmdKillAction):
+ d = {"action": "kill", "args": {"id": prev_action.id}}
+ elif isinstance(prev_action, BrowseURLAction):
+ d = {"action": "browse", "args": {"url": prev_action.url}}
+ elif isinstance(prev_action, FileReadAction):
+ d = {"action": "read", "args": {"file": prev_action.path}}
+ elif isinstance(prev_action, FileWriteAction):
+ d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
+ elif isinstance(prev_action, AgentRecallAction):
+ d = {"action": "recall", "args": {"query": prev_action.query}}
+ elif isinstance(prev_action, AgentThinkAction):
+ d = {"action": "think", "args": {"thought": prev_action.thought}}
+ elif isinstance(prev_action, AgentFinishAction):
+ d = {"action": "finish"}
+ else:
+ raise NotImplementedError(f"Unknown action type: {prev_action}")
+ self._add_event(d)
+
+ state.updated_info = []
+
+ action_dict = llm.request_action(
+ self.instruction,
+ self.monologue.get_thoughts(),
+ self.model_name,
+ state.background_commands_obs,
+ )
+ if action_dict is None:
+ action_dict = {"action": "think", "args": {"thought": "..."}}
+
+ # Translate action_dict to Action
+ action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
+ self.latest_action = action
+ return action
+
+ def search_memory(self, query: str) -> List[str]:
+ return self.memory.search(query)
+
Agent.register("LangchainsAgent", LangchainsAgent)
diff --git a/agenthub/langchains_agent/utils/agent.py b/agenthub/langchains_agent/utils/agent.py
deleted file mode 100644
index eab6caee7f9b..000000000000
--- a/agenthub/langchains_agent/utils/agent.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from agenthub.langchains_agent.utils.monologue import Monologue
-from agenthub.langchains_agent.utils.memory import LongTermMemory
-from opendevin.lib.event import Event
-import agenthub.langchains_agent.utils.llm as llm
-
-MAX_OUTPUT_LENGTH = 5000
-MAX_MONOLOGUE_LENGTH = 20000
-
-class Agent:
- def __init__(self, task, model_name):
- self.task = task
- self.model_name = model_name
- self.monologue = Monologue(model_name)
- self.memory = LongTermMemory()
-
- def add_event(self, event):
- if 'output' in event.args and len(event.args['output']) > MAX_OUTPUT_LENGTH:
- event.args['output'] = event.args['output'][:MAX_OUTPUT_LENGTH] + "..."
- self.monologue.add_event(event)
- self.memory.add_event(event)
- if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
- self.monologue.condense()
-
- def get_next_action(self, cmd_mgr):
- action_dict = llm.request_action(
- self.task,
- self.monologue.get_thoughts(),
- self.model_name,
- cmd_mgr.background_commands
- )
- if action_dict is None:
- # TODO: this seems to happen if the LLM response isn't valid JSON. Maybe it should be an `error` instead? How should we handle this case?
- return Event('think', {'thought': '...'})
- event = Event(action_dict['action'], action_dict['args'])
- self.latest_action = event
- return event
-
diff --git a/agenthub/langchains_agent/utils/llm.py b/agenthub/langchains_agent/utils/llm.py
index 98475e3cedaf..d3885ad67fe4 100644
--- a/agenthub/langchains_agent/utils/llm.py
+++ b/agenthub/langchains_agent/utils/llm.py
@@ -4,11 +4,16 @@
if os.getenv("DEBUG"):
from langchain.globals import set_debug
+
set_debug(True)
from typing import List
from langchain_core.pydantic_v1 import BaseModel
+from opendevin.observation import (
+ CmdOutputObservation,
+)
+
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
@@ -88,60 +93,73 @@
You can also use the same action and args from the source monologue.
"""
-class Action(BaseModel):
+
+class _ActionDict(BaseModel):
action: str
args: dict
+
class NewMonologue(BaseModel):
- new_monologue: List[Action]
+ new_monologue: List[_ActionDict]
+
def get_chain(template, model_name):
- assert "OPENAI_API_KEY" in os.environ, "Please set the OPENAI_API_KEY environment variable to use langchains_agent."
- llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name)
+ assert (
+ "OPENAI_API_KEY" in os.environ
+ ), "Please set the OPENAI_API_KEY environment variable to use langchains_agent."
+ llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name) # type: ignore
prompt = PromptTemplate.from_template(template)
llm_chain = LLMChain(prompt=prompt, llm=llm)
return llm_chain
-def summarize_monologue(thoughts, model_name):
+
+def summarize_monologue(thoughts: List[dict], model_name):
llm_chain = get_chain(MONOLOGUE_SUMMARY_PROMPT, model_name)
parser = JsonOutputParser(pydantic_object=NewMonologue)
- resp = llm_chain.invoke({'monologue': json.dumps({'old_monologue': thoughts})})
+ resp = llm_chain.invoke({"monologue": json.dumps({"old_monologue": thoughts})})
+
if os.getenv("DEBUG"):
print("resp", resp)
- parsed = parser.parse(resp['text'])
- return parsed['new_monologue']
+ parsed = parser.parse(resp["text"])
+ return parsed["new_monologue"]
+
-def request_action(task, thoughts, model_name, background_commands=[]):
+def request_action(
+ task,
+ thoughts: List[dict],
+ model_name: str,
+ background_commands_obs: List[CmdOutputObservation] = [],
+):
llm_chain = get_chain(ACTION_PROMPT, model_name)
- parser = JsonOutputParser(pydantic_object=Action)
- hint = ''
+ parser = JsonOutputParser(pydantic_object=_ActionDict)
+ hint = ""
if len(thoughts) > 0:
latest_thought = thoughts[-1]
- if latest_thought.action == 'think':
- if latest_thought.args['thought'].startswith("OK so my task is"):
+ if latest_thought["action"] == 'think':
+ if latest_thought["args"]['thought'].startswith("OK so my task is"):
hint = "You're just getting started! What should you do first?"
else:
hint = "You've been thinking a lot lately. Maybe it's time to take action?"
- elif latest_thought.action == 'error':
+ elif latest_thought["action"] == 'error':
hint = "Looks like that last command failed. Maybe you need to fix it, or try something else."
bg_commands_message = ""
- if len(background_commands) > 0:
+ if len(background_commands_obs) > 0:
bg_commands_message = "The following commands are running in the background:"
- for id, command in background_commands.items():
- bg_commands_message += f"\n`{id}`: {command.command}"
+ for command_obs in background_commands_obs:
+ bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}"
bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
latest_thought = thoughts[-1]
- resp = llm_chain.invoke({
- "monologue": json.dumps(thoughts),
- "hint": hint,
- "task": task,
- "background_commands": bg_commands_message,
- })
+ resp = llm_chain.invoke(
+ {
+ "monologue": json.dumps(thoughts),
+ "hint": hint,
+ "task": task,
+ "background_commands": bg_commands_message,
+ }
+ )
if os.getenv("DEBUG"):
print("resp", resp)
- parsed = parser.parse(resp['text'])
+ parsed = parser.parse(resp["text"])
return parsed
-
-
diff --git a/agenthub/langchains_agent/utils/memory.py b/agenthub/langchains_agent/utils/memory.py
index c73bc613e95d..0611a5aac6dd 100644
--- a/agenthub/langchains_agent/utils/memory.py
+++ b/agenthub/langchains_agent/utils/memory.py
@@ -18,9 +18,9 @@ def __init__(self):
def add_event(self, event):
doc = Document(
text=json.dumps(event),
- doc_id=self.thought_idx,
+ doc_id=str(self.thought_idx),
extra_info={
- "type": event.action,
+ "type": event["action"],
"idx": self.thought_idx,
},
)
diff --git a/agenthub/langchains_agent/utils/monologue.py b/agenthub/langchains_agent/utils/monologue.py
index 53ca081c5447..c9de0c79ffe3 100644
--- a/agenthub/langchains_agent/utils/monologue.py
+++ b/agenthub/langchains_agent/utils/monologue.py
@@ -1,6 +1,4 @@
import agenthub.langchains_agent.utils.json as json
-from opendevin.lib.event import Event
-
import agenthub.langchains_agent.utils.llm as llm
class Monologue:
@@ -8,7 +6,7 @@ def __init__(self, model_name):
self.thoughts = []
self.model_name = model_name
- def add_event(self, t):
+ def add_event(self, t: dict):
self.thoughts.append(t)
def get_thoughts(self):
@@ -19,6 +17,7 @@ def get_total_length(self):
def condense(self):
new_thoughts = llm.summarize_monologue(self.thoughts, self.model_name)
- self.thoughts = [Event(t['action'], t['args']) for t in new_thoughts]
+ # self.thoughts = [Event(t['action'], t['args']) for t in new_thoughts]
+ self.thoughts = new_thoughts
diff --git a/opendevin/action/__init__.py b/opendevin/action/__init__.py
new file mode 100644
index 000000000000..814842cac5eb
--- /dev/null
+++ b/opendevin/action/__init__.py
@@ -0,0 +1,19 @@
+from .base import Action, NullAction
+from .bash import CmdRunAction, CmdKillAction
+from .browse import BrowseURLAction
+from .fileop import FileReadAction, FileWriteAction
+from .agent import AgentRecallAction, AgentThinkAction, AgentFinishAction, AgentEchoAction
+
+__all__ = [
+ "Action",
+ "NullAction",
+ "CmdRunAction",
+ "CmdKillAction",
+ "BrowseURLAction",
+ "FileReadAction",
+ "FileWriteAction",
+ "AgentRecallAction",
+ "AgentThinkAction",
+ "AgentFinishAction",
+ "AgentEchoAction",
+]
diff --git a/opendevin/action/agent.py b/opendevin/action/agent.py
new file mode 100644
index 000000000000..48463dbb88e9
--- /dev/null
+++ b/opendevin/action/agent.py
@@ -0,0 +1,58 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from opendevin.observation import AgentRecallObservation, AgentMessageObservation, Observation
+from .base import ExecutableAction, NotExecutableAction
+if TYPE_CHECKING:
+ from opendevin.controller import AgentController
+
+
+@dataclass
+class AgentRecallAction(ExecutableAction):
+ query: str
+
+ def run(self, controller: "AgentController") -> AgentRecallObservation:
+ return AgentRecallObservation(
+ content="Recalling memories...",
+ memories=controller.agent.search_memory(self.query)
+ )
+
+ @property
+ def message(self) -> str:
+ return f"Recalling memories with query: {self.query}"
+
+
+@dataclass
+class AgentThinkAction(NotExecutableAction):
+ thought: str
+ runnable: bool = False
+
+ def run(self, controller: "AgentController") -> "Observation":
+ raise NotImplementedError
+
+ @property
+ def message(self) -> str:
+ return f"Thinking: {self.thought}"
+
+@dataclass
+class AgentEchoAction(ExecutableAction):
+ content: str
+ runnable: bool = True
+
+ def run(self, controller: "AgentController") -> "Observation":
+ return AgentMessageObservation(self.content)
+
+ @property
+ def message(self) -> str:
+ return f"Echoing: {self.content}"
+
+@dataclass
+class AgentFinishAction(NotExecutableAction):
+ runnable: bool = False
+
+ def run(self, controller: "AgentController") -> "Observation":
+ raise NotImplementedError
+
+ @property
+ def message(self) -> str:
+ return "Finished!"
diff --git a/opendevin/action/base.py b/opendevin/action/base.py
new file mode 100644
index 000000000000..4932a43f53e8
--- /dev/null
+++ b/opendevin/action/base.py
@@ -0,0 +1,45 @@
+from dataclasses import dataclass
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from opendevin.controller import AgentController
+ from opendevin.observation import Observation
+
+@dataclass
+class Action:
+ def run(self, controller: "AgentController") -> "Observation":
+ raise NotImplementedError
+
+ def to_dict(self):
+ return {"action": self.__class__.__name__, "args": self.__dict__, "message": self.message}
+
+ @property
+ def executable(self) -> bool:
+ raise NotImplementedError
+
+ @property
+ def message(self) -> str:
+ raise NotImplementedError
+
+
+
+class ExecutableAction(Action):
+ @property
+ def executable(self) -> bool:
+ return True
+
+
+class NotExecutableAction(Action):
+ @property
+ def executable(self) -> bool:
+ return False
+
+class NullAction(NotExecutableAction):
+ """An action that does nothing.
+ This is used when the agent need to receive user follow-up messages from the frontend.
+ """
+
+ @property
+ def message(self) -> str:
+ return "No action"
diff --git a/opendevin/action/bash.py b/opendevin/action/bash.py
new file mode 100644
index 000000000000..4342e970785c
--- /dev/null
+++ b/opendevin/action/bash.py
@@ -0,0 +1,32 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from .base import ExecutableAction
+
+if TYPE_CHECKING:
+ from opendevin.controller import AgentController
+ from opendevin.observation import CmdOutputObservation
+
+
+@dataclass
+class CmdRunAction(ExecutableAction):
+ command: str
+ background: bool = False
+
+ def run(self, controller: "AgentController") -> "CmdOutputObservation":
+ return controller.command_manager.run_command(self.command, self.background)
+
+ @property
+ def message(self) -> str:
+ return f"Running command: {self.command}"
+
+@dataclass
+class CmdKillAction(ExecutableAction):
+ id: int
+
+ def run(self, controller: "AgentController") -> "CmdOutputObservation":
+ return controller.command_manager.kill_command(self.id)
+
+ @property
+ def message(self) -> str:
+ return f"Killing command: {self.id}"
diff --git a/opendevin/action/browse.py b/opendevin/action/browse.py
new file mode 100644
index 000000000000..6c9bfd8bf915
--- /dev/null
+++ b/opendevin/action/browse.py
@@ -0,0 +1,21 @@
+import requests
+
+from dataclasses import dataclass
+from opendevin.observation import BrowserOutputObservation
+
+from .base import ExecutableAction
+
+@dataclass
+class BrowseURLAction(ExecutableAction):
+ url: str
+
+ def run(self, *args, **kwargs) -> BrowserOutputObservation:
+ response = requests.get(self.url)
+ return BrowserOutputObservation(
+ content=response.text,
+ url=self.url
+ )
+
+ @property
+ def message(self) -> str:
+ return f"Browsing URL: {self.url}"
diff --git a/opendevin/action/fileop.py b/opendevin/action/fileop.py
new file mode 100644
index 000000000000..77accb8e77bb
--- /dev/null
+++ b/opendevin/action/fileop.py
@@ -0,0 +1,47 @@
+import os
+from dataclasses import dataclass
+
+from opendevin.observation import Observation
+from .base import ExecutableAction
+
+# This is the path where the workspace is mounted in the container
+# The LLM sometimes returns paths with this prefix, so we need to remove it
+PATH_PREFIX = "/workspace/"
+
+def resolve_path(base_path, file_path):
+ if file_path.startswith(PATH_PREFIX):
+ file_path = file_path[len(PATH_PREFIX):]
+ return os.path.join(base_path, file_path)
+
+
+@dataclass
+class FileReadAction(ExecutableAction):
+ path: str
+ base_path: str = ""
+
+ def run(self, *args, **kwargs) -> Observation:
+ path = resolve_path(self.base_path, self.path)
+ with open(path, 'r') as file:
+ return Observation(file.read())
+
+ @property
+ def message(self) -> str:
+ return f"Reading file: {self.path}"
+
+
+@dataclass
+class FileWriteAction(ExecutableAction):
+ path: str
+ contents: str
+ base_path: str = ""
+
+ def run(self, *args, **kwargs) -> Observation:
+ path = resolve_path(self.base_path, self.path)
+ with open(path, 'w') as file:
+ file.write(self.contents)
+ return Observation(f"File written to {path}")
+
+ @property
+ def message(self) -> str:
+ return f"Writing file: {self.path}"
+
diff --git a/opendevin/agent.py b/opendevin/agent.py
index c6cf0bc1502b..09ede60cc7c2 100644
--- a/opendevin/agent.py
+++ b/opendevin/agent.py
@@ -1,40 +1,9 @@
from abc import ABC, abstractmethod
-from typing import List, Dict, Type
-from dataclasses import dataclass
-from enum import Enum
+from typing import List, Dict, Type, TYPE_CHECKING
-from .lib.event import Event
-from .lib.command_manager import CommandManager
-
-class Role(Enum):
- SYSTEM = "system" # system message for LLM
- USER = "user" # the user
- ASSISTANT = "assistant" # the agent
- ENVIRONMENT = "environment" # the environment (e.g., bash shell, web browser, etc.)
-
-@dataclass
-class Message:
- """
- This data class represents a message sent by an agent to another agent or user.
- """
-
- role: Role
- content: str
- # TODO: add more fields as needed
-
- def to_dict(self) -> Dict:
- """
- Converts the message to a dictionary (OpenAI chat-completion format).
-
- Returns:
- - message (Dict): A dictionary representation of the message.
- """
- role = self.role.value
- content = self.content
- if self.role == Role.ENVIRONMENT:
- content = f"Environment Observation:\n{content}"
- role = "user" # treat environment messages as user messages
- return {"role": role, "content": content}
+if TYPE_CHECKING:
+ from opendevin.action import Action
+ from opendevin.state import State
class Agent(ABC):
@@ -45,26 +14,15 @@ class Agent(ABC):
It tracks the execution status and maintains a history of interactions.
:param instruction: The instruction for the agent to execute.
- :param workspace_dir: The working directory for the agent.
:param model_name: The litellm name of the model to use for the agent.
- :param max_steps: The maximum number of steps to run the agent.
"""
- _registry: Dict[str, Type['Agent']] = {}
+ _registry: Dict[str, Type["Agent"]] = {}
- def __init__(
- self,
- workspace_dir: str,
- model_name: str,
- max_steps: int = 100
- ):
- self.instruction = ""
- self.workspace_dir = workspace_dir
+ def __init__(self, model_name: str):
self.model_name = model_name
- self.max_steps = max_steps
-
+ self.instruction: str = "" # need to be set before step
self._complete = False
- self._history: List[Message] = []
@property
def complete(self) -> bool:
@@ -76,28 +34,8 @@ def complete(self) -> bool:
"""
return self._complete
- @property
- def history(self) -> List[Message]:
- """
- Provides the history of interactions or state changes since the instruction was initiated.
-
- Returns:
- - history (List[str]): A list of strings representing the history.
- """
- return self._history
-
@abstractmethod
- def add_event(self, event: Event) -> None:
- """
- Adds an event to the agent's history.
-
- Parameters:
- - event (Event): The event to add to the history.
- """
- pass
-
- @abstractmethod
- def step(self, cmd_mgr: CommandManager) -> Event:
+ def step(self, state: "State") -> "Action":
"""
Starts the execution of the assigned instruction. This method should
be implemented by subclasses to define the specific execution logic.
@@ -123,12 +61,11 @@ def reset(self) -> None:
to prepare the agent for restarting the instruction or cleaning up before destruction.
"""
- self.instruction = ''
+ self.instruction = ""
self._complete = False
- self._history = []
@classmethod
- def register(cls, name: str, agent_cls: Type['Agent']):
+ def register(cls, name: str, agent_cls: Type["Agent"]):
"""
Registers an agent class in the registry.
@@ -141,7 +78,7 @@ def register(cls, name: str, agent_cls: Type['Agent']):
cls._registry[name] = agent_cls
@classmethod
- def get_cls(cls, name: str) -> Type['Agent']:
+ def get_cls(cls, name: str) -> Type["Agent"]:
"""
Retrieves an agent class from the registry.
diff --git a/opendevin/controller.py b/opendevin/controller.py
deleted file mode 100644
index 008440ed60aa..000000000000
--- a/opendevin/controller.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import asyncio
-
-from opendevin.lib.command_manager import CommandManager
-from opendevin.lib.event import Event
-
-def print_callback(event):
- print(event.str_truncated(), flush=True)
-
-class AgentController:
- def __init__(self, agent, workdir, max_iterations=100, callbacks=[]):
- self.agent = agent
- self.max_iterations = max_iterations
- self.background_commands = []
- self.command_manager = CommandManager(workdir)
- self.callbacks = callbacks
- self.callbacks.append(self.agent.add_event)
- self.callbacks.append(print_callback)
-
- async def add_user_event(self, event: Event):
- await self.handle_action(event)
-
- async def start_loop(self, task):
- try:
- self.agent.instruction = task
- for i in range(self.max_iterations):
- print("STEP", i, flush=True)
- done = await self.step()
- if done:
- print("FINISHED", flush=True)
- break
- except Exception as e:
- print("Error in loop", e, flush=True)
- pass
-
-
- async def step(self) -> bool:
- log_events = self.command_manager.get_background_events()
- for event in log_events:
- await self.run_callbacks(event)
-
- try:
- action_event = self.agent.step(self.command_manager)
- except Exception as e:
- action_event = Event('error', {'error': str(e)})
- if action_event is None:
- action_event = Event('error', {'error': "Agent did not return an event"})
-
- await self.handle_action(action_event)
- return action_event.action == 'finish'
-
- async def handle_action(self, event: Event):
- print("=== HANDLING EVENT ===", flush=True)
- await self.run_callbacks(event)
- print("--- EVENT OUTPUT ---", flush=True)
- output_event = event.run(self)
- await self.run_callbacks(output_event)
-
- async def run_callbacks(self, event):
- if event is None:
- return
- for callback in self.callbacks:
- idx = self.callbacks.index(callback)
- try:
- callback(event)
- except Exception as e:
- print("Callback error:" + str(idx), e, flush=True)
- pass
- await asyncio.sleep(0.001) # Give back control for a tick, so we can await in callbacks
diff --git a/opendevin/controller/__init__.py b/opendevin/controller/__init__.py
new file mode 100644
index 000000000000..f8a06c9b9a5e
--- /dev/null
+++ b/opendevin/controller/__init__.py
@@ -0,0 +1,91 @@
+import asyncio
+from typing import List, Callable, Tuple
+
+from opendevin.state import State
+from opendevin.agent import Agent
+from opendevin.action import (
+ Action,
+ NullAction,
+ FileReadAction,
+ FileWriteAction,
+ AgentFinishAction,
+)
+from opendevin.observation import (
+ Observation,
+ NullObservation
+)
+
+
+from .command_manager import CommandManager
+
+
+class AgentController:
+ def __init__(
+ self,
+ agent: Agent,
+ workdir: str,
+ max_iterations: int = 100,
+ callbacks: List[Callable] = [],
+ ):
+ self.agent = agent
+ self.max_iterations = max_iterations
+ self.workdir = workdir
+ self.command_manager = CommandManager(workdir)
+ self.callbacks = callbacks
+ self.state_updated_info: List[Tuple[Action, Observation]] = []
+
+ def get_current_state(self) -> State:
+ # update observations & actions
+ state = State(
+ background_commands_obs=self.command_manager.get_background_obs(),
+ updated_info=self.state_updated_info,
+ )
+ self.state_updated_info = []
+ return state
+
+ def add_observation(self, observation: Observation):
+ self.state_updated_info.append((NullAction(), observation))
+
+ async def start_loop(self, task_instruction: str):
+ try:
+ self.agent.instruction = task_instruction
+ for i in range(self.max_iterations):
+ print("STEP", i, flush=True)
+
+ state: State = self.get_current_state()
+ action: Action = self.agent.step(state)
+
+ print("ACTION", action, flush=True)
+ for _callback_fn in self.callbacks:
+ _callback_fn(action)
+
+ if isinstance(action, AgentFinishAction):
+ print("FINISHED", flush=True)
+ break
+ if isinstance(action, (FileReadAction, FileWriteAction)):
+ action_cls = action.__class__
+ _kwargs = action.__dict__
+ _kwargs["base_path"] = self.workdir
+ action = action_cls(**_kwargs)
+ print(action, flush=True)
+ print("---", flush=True)
+
+
+ if action.executable:
+ observation: Observation = action.run(self)
+ else:
+ print("ACTION NOT EXECUTABLE", flush=True)
+ observation = NullObservation("")
+ print("OBSERVATION", observation, flush=True)
+ self.state_updated_info.append((action, observation))
+
+ print(observation, flush=True)
+ for _callback_fn in self.callbacks:
+ _callback_fn(observation)
+
+ print("==============", flush=True)
+
+ await asyncio.sleep(0.001)
+ except Exception as e:
+ print("Error in loop", e, flush=True)
+ pass
diff --git a/opendevin/controller/command_manager.py b/opendevin/controller/command_manager.py
new file mode 100644
index 000000000000..51a8cd0de91a
--- /dev/null
+++ b/opendevin/controller/command_manager.py
@@ -0,0 +1,66 @@
+from typing import List
+
+from opendevin.observation import CmdOutputObservation
+from opendevin.sandbox.sandbox import DockerInteractive
+
+
+class BackgroundCommand:
+ def __init__(self, id: int, command: str, dir: str):
+ self.command = command
+ self.id = id
+ self.shell = DockerInteractive(id=str(id), workspace_dir=dir)
+ self.shell.execute_in_background(command)
+
+ def get_logs(self) -> str:
+ # TODO: get an exit code if process is exited
+ return self.shell.read_logs()
+
+
+class CommandManager:
+ def __init__(self, dir):
+ self.cur_id = 0
+ self.directory = dir
+ self.background_commands = {}
+ self.shell = DockerInteractive(id="default", workspace_dir=dir)
+
+ def run_command(self, command: str, background=False) -> CmdOutputObservation:
+ if background:
+ return self._run_background(command)
+ else:
+ return self._run_immediately(command)
+
+ def _run_immediately(self, command: str) -> CmdOutputObservation:
+ exit_code, output = self.shell.execute(command)
+ return CmdOutputObservation(
+ content=output,
+ command_id=self.cur_id,
+ command=command,
+ exit_code=exit_code
+ )
+
+ def _run_background(self, command: str) -> CmdOutputObservation:
+ bg_cmd = BackgroundCommand(self.cur_id, command, self.directory)
+ self.cur_id += 1
+ self.background_commands[bg_cmd.id] = bg_cmd
+ return CmdOutputObservation(
+ content=f"Background command started. To stop it, send a `kill` action with id {bg_cmd.id}",
+ command_id=bg_cmd.id,
+ command=command,
+ exit_code=0
+ )
+
+ def kill_command(self, id: int):
+ # TODO: get log events before killing
+ self.background_commands[id].shell.close()
+ del self.background_commands[id]
+
+ def get_background_obs(self) -> List[CmdOutputObservation]:
+ obs = []
+ for _id, cmd in self.background_commands.items():
+ output = cmd.get_logs()
+ obs.append(
+ CmdOutputObservation(
+ content=output, command_id=_id, command=cmd.command
+ )
+ )
+ return obs
diff --git a/opendevin/lib/actions/__init__.py b/opendevin/lib/actions/__init__.py
deleted file mode 100644
index 34f5ce61a75c..000000000000
--- a/opendevin/lib/actions/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .browse import browse
-from .write import write
-from .read import read
-
-__all__ = ['run', 'kill', 'browse', 'write', 'read']
diff --git a/opendevin/lib/actions/browse.py b/opendevin/lib/actions/browse.py
deleted file mode 100644
index 033fe0fbb32b..000000000000
--- a/opendevin/lib/actions/browse.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import requests
-
-def browse(url):
- response = requests.get(url)
- return response.text
-
diff --git a/opendevin/lib/actions/read.py b/opendevin/lib/actions/read.py
deleted file mode 100644
index 09137533673a..000000000000
--- a/opendevin/lib/actions/read.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from .util import resolve_path
-
-def read(base_path, file_path):
- file_path = resolve_path(base_path, file_path)
- with open(file_path, 'r') as file:
- return file.read()
-
diff --git a/opendevin/lib/actions/util.py b/opendevin/lib/actions/util.py
deleted file mode 100644
index c8c10852f674..000000000000
--- a/opendevin/lib/actions/util.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import os
-
-# This is the path where the workspace is mounted in the container
-# The LLM sometimes returns paths with this prefix, so we need to remove it
-PATH_PREFIX = "/workspace/"
-
-def resolve_path(base_path, file_path):
- if file_path.startswith(PATH_PREFIX):
- file_path = file_path[len(PATH_PREFIX):]
- return os.path.join(base_path, file_path)
diff --git a/opendevin/lib/actions/write.py b/opendevin/lib/actions/write.py
deleted file mode 100644
index a8c879872de6..000000000000
--- a/opendevin/lib/actions/write.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from .util import resolve_path
-
-def write(base_path, file_path, contents):
- file_path = resolve_path(base_path, file_path)
- with open(file_path, 'w') as file:
- file.write(contents)
- return ""
-
diff --git a/opendevin/lib/command_manager.py b/opendevin/lib/command_manager.py
deleted file mode 100644
index b3273369f10c..000000000000
--- a/opendevin/lib/command_manager.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from typing import List
-
-from opendevin.lib.event import Event
-from opendevin.sandbox.sandbox import DockerInteractive
-
-class BackgroundCommand:
- def __init__(self, id: int, command: str, dir: str):
- self.command = command
- self.id = id
- self.shell = DockerInteractive(id=str(id), workspace_dir=dir)
- self.shell.execute_in_background(command)
-
- def get_logs(self):
- # TODO: get an exit code if process is exited
- return self.shell.read_logs()
-
-class CommandManager:
- def __init__(self, dir):
- self.cur_id = 0
- self.directory = dir
- self.background_commands = {}
- self.shell = DockerInteractive(id="default", workspace_dir=dir)
-
- def run_command(self, command: str, background=False) -> str:
- if background:
- return self.run_background(command)
- else:
- return self.run_immediately(command)
-
- def run_immediately(self, command: str) -> str:
- exit_code, output = self.shell.execute(command)
- if exit_code != 0:
- raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
- return output
-
- def run_background(self, command: str) -> str:
- bg_cmd = BackgroundCommand(self.cur_id, command, self.directory)
- self.cur_id += 1
- self.background_commands[bg_cmd.id] = bg_cmd
- return "Background command started. To stop it, send a `kill` action with id " + str(bg_cmd.id)
-
- def kill_command(self, id: int):
- # TODO: get log events before killing
- self.background_commands[id].shell.close()
- del self.background_commands[id]
-
- def get_background_events(self) -> List[Event]:
- events = []
- for id, cmd in self.background_commands.items():
- output = cmd.get_logs()
- events.append(Event('output', {
- 'output': output,
- 'id': id,
- 'command': cmd.command,
- }))
- return events
diff --git a/opendevin/lib/event.py b/opendevin/lib/event.py
deleted file mode 100644
index bf938c5b72a3..000000000000
--- a/opendevin/lib/event.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import opendevin.lib.actions as actions
-
-ACTION_TYPES = ['initialize', 'start', 'summarize', 'run', 'kill', 'browse', 'read', 'write', 'recall', 'think', 'output', 'error', 'finish']
-RUNNABLE_ACTIONS = ['run', 'kill', 'browse', 'read', 'write', 'recall']
-
-class Event:
- def __init__(self, action, args, message=None):
- if action not in ACTION_TYPES:
- raise ValueError('Invalid action type: ' + action)
- self.action = action
- self.args = args
- self.message = message
-
- def __str__(self):
- return self.action + " " + str(self.args)
-
- def str_truncated(self, max_len=1000):
- s = str(self)
- if len(s) > max_len:
- s = s[:max_len] + '...'
- return s
-
- def to_dict(self):
- return {
- 'action': self.action,
- 'args': self.args
- }
-
- def get_message(self) -> str:
- if self.message is not None:
- return self.message
- if self.action == 'run':
- return 'Running command: ' + self.args['command']
- elif self.action == 'kill':
- return 'Killing command: ' + self.args['id']
- elif self.action == 'browse':
- return 'Browsing: ' + self.args['url']
- elif self.action == 'read':
- return 'Reading file: ' + self.args['path']
- elif self.action == 'write':
- return 'Writing to file: ' + self.args['path']
- elif self.action == 'recall':
- return 'Recalling memory: ' + self.args['query']
- elif self.action == 'think':
- return self.args['thought']
- elif self.action == 'output':
- return "Got output."
- elif self.action == 'error':
- return "Got an error: " + self.args['output']
- elif self.action == 'finish':
- return "Finished!"
- else:
- return ""
-
- def is_runnable(self):
- return self.action in RUNNABLE_ACTIONS
-
- def run(self, agent_controller):
- if not self.is_runnable():
- return None
- action = 'output'
- try:
- output = self._run_and_get_output(agent_controller)
- except Exception as e:
- output = 'Error: ' + str(e)
- action = 'error'
- out_event = Event(action, {'output': output})
- return out_event
-
- def _run_and_get_output(self, agent_controller) -> str:
- if self.action == 'run':
- cmd = self.args['command']
- background = False
- if 'background' in self.args and self.args['background']:
- background = True
- return agent_controller.command_manager.run_command(cmd, background)
- if self.action == 'kill':
- id = self.args['id']
- return agent_controller.command_manager.kill_command(id)
- elif self.action == 'browse':
- url = self.args['url']
- return actions.browse(url)
- elif self.action == 'read':
- path = self.args['path']
- return actions.read(agent_controller.command_manager.directory, path)
- elif self.action == 'write':
- path = self.args['path']
- contents = self.args['contents']
- return actions.write(agent_controller.command_manager.directory, path, contents)
- elif self.action == 'recall':
- return agent_controller.agent.search_memory(self.args['query'])
- else:
- raise ValueError('Invalid action type')
diff --git a/opendevin/main.py b/opendevin/main.py
index 9fce18d69687..dfca536400c5 100644
--- a/opendevin/main.py
+++ b/opendevin/main.py
@@ -1,25 +1,54 @@
-from typing import Type
import asyncio
import argparse
+from typing import Type
+
import agenthub # noqa F401 (we import this to get the agents registered)
from opendevin.agent import Agent
from opendevin.controller import AgentController
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run an agent with a specific task")
- parser.add_argument("-d", "--directory", required=True, type=str, help="The working directory for the agent")
- parser.add_argument("-t", "--task", required=True, type=str, help="The task for the agent to perform")
- parser.add_argument("-c", "--agent-cls", default="LangchainsAgent", type=str, help="The agent class to use")
- parser.add_argument("-m", "--model-name", default="gpt-4-0125-preview", type=str, help="The (litellm) model name to use")
+ parser.add_argument(
+ "-d",
+ "--directory",
+ required=True,
+ type=str,
+ help="The working directory for the agent",
+ )
+ parser.add_argument(
+ "-t",
+ "--task",
+ required=True,
+ type=str,
+ help="The task for the agent to perform",
+ )
+ parser.add_argument(
+ "-c",
+ "--agent-cls",
+ default="LangchainsAgent",
+ type=str,
+ help="The agent class to use",
+ )
+ parser.add_argument(
+ "-m",
+ "--model-name",
+ default="gpt-4-0125-preview",
+ type=str,
+ help="The (litellm) model name to use",
+ )
+ parser.add_argument(
+ "-i",
+ "--max-iterations",
+ default=10,
+ type=int,
+ help="The maximum number of iterations to run the agent",
+ )
args = parser.parse_args()
print(f"Running agent {args.agent_cls} (model: {args.model_name}, directory: {args.directory}) with task: \"{args.task}\"")
AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls)
- agent = AgentCls(
- workspace_dir=args.directory,
- model_name=args.model_name
- )
- controller = AgentController(agent, args.directory)
+ agent = AgentCls(model_name=args.model_name)
+ controller = AgentController(agent, workdir=args.directory, max_iterations=args.max_iterations)
asyncio.run(controller.start_loop(args.task))
diff --git a/opendevin/observation.py b/opendevin/observation.py
new file mode 100644
index 000000000000..03909070810a
--- /dev/null
+++ b/opendevin/observation.py
@@ -0,0 +1,115 @@
+import copy
+from typing import List
+from dataclasses import dataclass
+
+
+@dataclass
+class Observation:
+ """
+ This data class represents an observation of the environment.
+ """
+
+ content: str
+
+ def __str__(self) -> str:
+ return self.content
+
+ def to_dict(self) -> dict:
+ """Converts the observation to a dictionary."""
+ extras = copy.deepcopy(self.__dict__)
+ extras.pop("content", None)
+ return {
+ "observation": self.__class__.__name__,
+ "content": self.content,
+ "extras": extras,
+ "message": self.message,
+ }
+
+ @property
+ def message(self) -> str:
+ """Returns a message describing the observation."""
+ return "The agent made an observation."
+
+
+@dataclass
+class CmdOutputObservation(Observation):
+ """
+ This data class represents the output of a command.
+ """
+
+ command_id: int
+ command: str
+ exit_code: int = 0
+
+ @property
+ def error(self) -> bool:
+ return self.exit_code != 0
+
+ @property
+ def message(self) -> str:
+ return f'The agent observed command "{self.command}" executed with exit code {self.exit_code}.'
+
+
+@dataclass
+class BrowserOutputObservation(Observation):
+ """
+ This data class represents the output of a browser.
+ """
+
+ url: str
+
+ @property
+ def message(self) -> str:
+ return "The agent observed the browser output at URL."
+
+
+@dataclass
+class UserMessageObservation(Observation):
+ """
+ This data class represents a message sent by the user.
+ """
+
+ role: str = "user"
+
+ @property
+ def message(self) -> str:
+ return "The agent received a message from the user."
+
+
+@dataclass
+class AgentMessageObservation(Observation):
+ """
+ This data class represents a message sent by the agent.
+ """
+
+ role: str = "assistant"
+
+ @property
+ def message(self) -> str:
+ return "The agent received a message from itself."
+
+
+@dataclass
+class AgentRecallObservation(Observation):
+ """
+ This data class represents a list of memories recalled by the agent.
+ """
+
+ memories: List[str]
+ role: str = "assistant"
+
+ @property
+ def message(self) -> str:
+ return "The agent recalled memories."
+
+
+@dataclass
+class NullObservation(Observation):
+ """
+ This data class represents a null observation.
+ This is used when the produced action is NOT executable.
+ """
+
+ @property
+ def message(self) -> str:
+ return ""
diff --git a/opendevin/sandbox/sandbox.py b/opendevin/sandbox/sandbox.py
index 97d5fdf86ff9..2b49bea19535 100644
--- a/opendevin/sandbox/sandbox.py
+++ b/opendevin/sandbox/sandbox.py
@@ -48,7 +48,13 @@ def __init__(
self.container_name = f"sandbox-{self.instance_id}"
self.restart_docker_container()
- self.execute('useradd --shell /bin/bash -u {uid} -o -c \"\" -m devin && su devin')
+ uid = os.getuid()
+ exit_code, logs = self.container.exec_run([
+ '/bin/bash', '-c',
+ f'useradd --shell /bin/bash -u {uid} -o -c \"\" -m devin'
+ ],
+ workdir="/workspace"
+ )
# regester container cleanup function
atexit.register(self.cleanup)
@@ -70,12 +76,13 @@ def read_logs(self) -> str:
return logs
def execute(self, cmd: str) -> Tuple[int, str]:
- exit_code, logs = self.container.exec_run(['/bin/bash', '-c', cmd], workdir="/workspace")
+ # TODO: each execute is not stateful! We need to keep track of the current working directory
+ exit_code, logs = self.container.exec_run(['su', 'devin', '-c', cmd], workdir="/workspace")
return exit_code, logs.decode('utf-8')
def execute_in_background(self, cmd: str) -> None:
self.log_time = time.time()
- result = self.container.exec_run(['/bin/bash', '-c', cmd], socket=True, workdir="/workspace")
+ result = self.container.exec_run(['su', 'devin', '-c', cmd], socket=True, workdir="/workspace")
self.log_generator = result.output # socket.SocketIO
self.log_generator._sock.setblocking(0)
diff --git a/opendevin/server/session.py b/opendevin/server/session.py
index 5d67ccfc4038..dc6675dbc9dd 100644
--- a/opendevin/server/session.py
+++ b/opendevin/server/session.py
@@ -1,14 +1,42 @@
import os
import asyncio
-from typing import Optional
+from typing import Optional, Dict, Type
from fastapi import WebSocketDisconnect
from opendevin.agent import Agent
from opendevin.controller import AgentController
-from opendevin.lib.event import Event
-DEFAULT_WORKSPACE_DIR = os.getenv("WORKSPACE_DIR", os.getcwd())
+from opendevin.action import (
+ Action,
+ CmdRunAction,
+ CmdKillAction,
+ BrowseURLAction,
+ FileReadAction,
+ FileWriteAction,
+ AgentRecallAction,
+ AgentThinkAction,
+ AgentFinishAction,
+)
+from opendevin.observation import (
+ Observation,
+ UserMessageObservation
+)
+
+# NOTE: this is a temporary solution - but hopefully we can use Action/Observation throughout the codebase
+ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
+ "run": CmdRunAction,
+ "kill": CmdKillAction,
+ "browse": BrowseURLAction,
+ "read": FileReadAction,
+ "write": FileWriteAction,
+ "recall": AgentRecallAction,
+ "think": AgentThinkAction,
+ "finish": AgentFinishAction,
+}
+
+
+DEFAULT_WORKSPACE_DIR = os.getenv("WORKSPACE_DIR", os.path.join(os.getcwd(), "workspace"))
def parse_event(data):
if "action" not in data:
@@ -20,7 +48,11 @@ def parse_event(data):
message = None
if "message" in data:
message = data["message"]
- return Event(action, args, message)
+ return {
+ "action": action,
+ "args": args,
+ "message": message,
+ }
class Session:
def __init__(self, websocket):
@@ -57,15 +89,21 @@ async def start_listening(self):
if event is None:
await self.send_error("Invalid event")
continue
- if event.action == "initialize":
+ if event["action"] == "initialize":
await self.create_controller(event)
- elif event.action == "start":
+ elif event["action"] == "start":
await self.start_task(event)
else:
if self.controller is None:
await self.send_error("No agent started. Please wait a second...")
+
+ elif event["action"] == "chat":
+ self.controller.add_observation(UserMessageObservation(event["message"]))
else:
- await self.controller.add_user_event(event)
+ # TODO: we only need to implement user message for now
+ # since even Devin does not support having the user taking other
+ # actions (e.g., edit files) while the agent is running
+ raise NotImplementedError
except WebSocketDisconnect as e:
self.websocket = None
@@ -83,30 +121,28 @@ async def create_controller(self, start_event=None):
model = "gpt-4-0125-preview"
if start_event and "model" in start_event.args:
model = start_event.args["model"]
-
+
+ if not os.path.exists(directory):
+ print(f"Workspace directory {directory} does not exist. Creating it...")
+ os.makedirs(directory)
+ directory = os.path.relpath(directory, os.getcwd())
+
AgentCls = Agent.get_cls(agent_cls)
- self.agent = AgentCls(
- workspace_dir=directory,
- model_name=model,
- )
+ self.agent = AgentCls(model_name=model)
self.controller = AgentController(self.agent, directory, callbacks=[self.on_agent_event])
await self.send({"action": "initialize", "message": "Control loop started."})
async def start_task(self, start_event):
- if "task" not in start_event.args:
+ if "task" not in start_event["args"]:
await self.send_error("No task specified")
return
await self.send_message("Starting new task...")
- task = start_event.args["task"]
+ task = start_event["args"]["task"]
if self.controller is None:
await self.send_error("No agent started. Please wait a second...")
return
self.agent_task = asyncio.create_task(self.controller.start_loop(task), name="agent loop")
- def on_agent_event(self, event):
- evt = {
- "action": event.action,
- "message": event.get_message(),
- "args": event.args,
- }
- asyncio.create_task(self.send(evt), name="send event in callback")
+ def on_agent_event(self, event: Observation | Action):
+ event_dict = event.to_dict()
+ asyncio.create_task(self.send(event_dict), name="send event in callback")
diff --git a/opendevin/state.py b/opendevin/state.py
new file mode 100644
index 000000000000..d7a6b41ba793
--- /dev/null
+++ b/opendevin/state.py
@@ -0,0 +1,16 @@
+from dataclasses import dataclass
+from typing import List, Tuple
+
+from opendevin.action import (
+ Action,
+)
+from opendevin.observation import (
+ Observation,
+ CmdOutputObservation,
+)
+
+
+@dataclass
+class State:
+ background_commands_obs: List[CmdOutputObservation]
+ updated_info: List[Tuple[Action, Observation]]