Refactoring: event stream based agent history (#2709)

* add to event stream sync * remove async from tests * small logging spam fix * remove swe agent * arch refactoring: use history from the event stream * refactor agents * monologue agent * ruff * planner agent * micro-agents * refactor history in evaluations * evals history refactoring * adapt evals and tests * unit testing stuck * testing micro agents, event stream * fix planner agent * fix tests * fix stuck after rename * fix test * small clean up * fix merge * fix merge issue * fix integration tests * Update agenthub/dummy_agent/agent.py * fix tests * rename more clearly; add todo; clean up
All-Hands-AI · Jul 7, 2024 · d37b297 · d37b297
1 parent 9dc2d2c
commit d37b297
Show file tree

Hide file tree

Showing 107 changed files with 1,670 additions and 676 deletions.
diff --git a/agenthub/browsing_agent/browsing_agent.py b/agenthub/browsing_agent/browsing_agent.py
@@ -15,6 +15,7 @@
 )
 from opendevin.events.event import EventSource
 from opendevin.events.observation import BrowserOutputObservation
+from opendevin.events.observation.observation import Observation
 from opendevin.llm.llm import LLM
 from opendevin.runtime.plugins import (
     PluginRequirement,
@@ -146,23 +147,21 @@ def step(self, state: State) -> Action:
         last_obs = None
         last_action = None
 
-        if EVAL_MODE and len(state.history) == 1:
+        if EVAL_MODE and len(state.history.get_events_as_list()) == 1:
             # for webarena and miniwob++ eval, we need to retrieve the initial observation already in browser env
             # initialize and retrieve the first observation by issuing an noop OP
             # For non-benchmark browsing, the browser env starts with a blank page, and the agent is expected to first navigate to desired websites
             return BrowseInteractiveAction(browser_actions='noop()')
 
-        for prev_action, obs in state.history:
-            if isinstance(prev_action, BrowseInteractiveAction):
-                prev_actions.append(prev_action.browser_actions)
-                last_obs = obs
-                last_action = prev_action
-            elif (
-                isinstance(prev_action, MessageAction)
-                and prev_action.source == EventSource.AGENT
-            ):
-                # agent has responded, task finish.
-                return AgentFinishAction(outputs={'content': prev_action.content})
+        for event in state.history.get_events():
+            if isinstance(event, BrowseInteractiveAction):
+                prev_actions.append(event.browser_actions)
+                last_action = event
+            elif isinstance(event, MessageAction) and event.source == EventSource.AGENT:
+                # agent has responded, task finished.
+                return AgentFinishAction(outputs={'content': event.content})
+            elif isinstance(event, Observation):
+                last_obs = event
 
         if EVAL_MODE:
             prev_actions = prev_actions[1:]  # remove the first noop action
@@ -207,7 +206,7 @@ def step(self, state: State) -> Action:
 
         prompt = get_prompt(error_prefix, cur_axtree_txt, prev_action_str)
         messages.append({'role': 'user', 'content': prompt})
-        logger.info(prompt)
+        logger.debug(prompt)
         response = self.llm.completion(
             messages=messages,
             temperature=0.0,

diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
@@ -182,27 +182,14 @@ def step(self, state: State) -> Action:
         - MessageAction(content) - Message action to run (e.g. ask for clarification)
         - AgentFinishAction() - end the interaction
         """
-        messages: list[dict[str, str]] = [
-            {'role': 'system', 'content': self.system_message},
-            {'role': 'user', 'content': self.in_context_example},
-        ]
 
-        for prev_action, obs in state.history:
-            action_message = get_action_message(prev_action)
-            if action_message:
-                messages.append(action_message)
+        # if we're done, go back
+        latest_user_message = state.history.get_last_user_message()
+        if latest_user_message and latest_user_message.strip() == '/exit':
+            return AgentFinishAction()
 
-            obs_message = get_observation_message(obs)
-            if obs_message:
-                messages.append(obs_message)
-
-        latest_user_message = [m for m in messages if m['role'] == 'user'][-1]
-        if latest_user_message:
-            if latest_user_message['content'].strip() == '/exit':
-                return AgentFinishAction()
-            latest_user_message['content'] += (
-                f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
-            )
+        # prepare what we want to send to the LLM
+        messages: list[dict[str, str]] = self._get_messages(state)
 
         response = self.llm.completion(
             messages=messages,
@@ -217,3 +204,35 @@ def step(self, state: State) -> Action:
 
     def search_memory(self, query: str) -> list[str]:
         raise NotImplementedError('Implement this abstract method')
+
+    def _get_messages(self, state: State) -> list[dict[str, str]]:
+        messages = [
+            {'role': 'system', 'content': self.system_message},
+            {'role': 'user', 'content': self.in_context_example},
+        ]
+
+        for event in state.history.get_events():
+            # create a regular message from an event
+            message = (
+                get_action_message(event)
+                if isinstance(event, Action)
+                else get_observation_message(event)
+            )
+
+            # add regular message
+            if message:
+                messages.append(message)
+
+        # the latest user message is important:
+        # we want to remind the agent of the environment constraints
+        latest_user_message = next(
+            (m for m in reversed(messages) if m['role'] == 'user'), None
+        )
+
+        # add a reminder to the prompt
+        if latest_user_message:
+            latest_user_message['content'] += (
+                f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>'
+            )
+
+        return messages
diff --git a/agenthub/codeact_swe_agent/codeact_swe_agent.py b/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -138,27 +138,14 @@ def step(self, state: State) -> Action:
         - MessageAction(content) - Message action to run (e.g. ask for clarification)
         - AgentFinishAction() - end the interaction
         """
-        messages: list[dict[str, str]] = [
-            {'role': 'system', 'content': self.system_message},
-            {'role': 'user', 'content': self.in_context_example},
-        ]
 
-        for prev_action, obs in state.history:
-            action_message = get_action_message(prev_action)
-            if action_message:
-                messages.append(action_message)
+        # if we're done, go back
+        latest_user_message = state.history.get_last_user_message()
+        if latest_user_message and latest_user_message.strip() == '/exit':
+            return AgentFinishAction()
 
-            obs_message = get_observation_message(obs)
-            if obs_message:
-                messages.append(obs_message)
-
-        latest_user_message = [m for m in messages if m['role'] == 'user'][-1]
-        if latest_user_message:
-            if latest_user_message['content'].strip() == '/exit':
-                return AgentFinishAction()
-            latest_user_message['content'] += (
-                f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
-            )
+        # prepare what we want to send to the LLM
+        messages: list[dict[str, str]] = self._get_messages(state)
 
         response = self.llm.completion(
             messages=messages,
@@ -173,3 +160,35 @@ def step(self, state: State) -> Action:
 
     def search_memory(self, query: str) -> list[str]:
         raise NotImplementedError('Implement this abstract method')
+
+    def _get_messages(self, state: State) -> list[dict[str, str]]:
+        messages = [
+            {'role': 'system', 'content': self.system_message},
+            {'role': 'user', 'content': self.in_context_example},
+        ]
+
+        for event in state.history.get_events():
+            # create a regular message from an event
+            message = (
+                get_action_message(event)
+                if isinstance(event, Action)
+                else get_observation_message(event)
+            )
+
+            # add regular message
+            if message:
+                messages.append(message)
+
+        # the latest user message is important:
+        # we want to remind the agent of the environment constraints
+        latest_user_message = next(
+            (m for m in reversed(messages) if m['role'] == 'user'), None
+        )
+
+        # add a reminder to the prompt
+        if latest_user_message:
+            latest_user_message['content'] += (
+                f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
+            )
+
+        return messages
diff --git a/agenthub/delegator_agent/agent.py b/agenthub/delegator_agent/agent.py
@@ -41,7 +41,9 @@ def step(self, state: State) -> Action:
                 agent='StudyRepoForTaskAgent', inputs={'task': task}
             )
 
-        last_observation = state.history[-1][1]
+        # last observation in history should be from the delegate
+        last_observation = state.history.get_last_observation()
+
         if not isinstance(last_observation, AgentDelegateObservation):
             raise Exception('Last observation is not an AgentDelegateObservation')
 

diff --git a/agenthub/dummy_agent/agent.py b/agenthub/dummy_agent/agent.py
@@ -125,11 +125,16 @@ def step(self, state: State) -> Action:
         time.sleep(0.1)
         if state.iteration > 0:
             prev_step = self.steps[state.iteration - 1]
+
+            # a step is (action, observations list)
             if 'observations' in prev_step:
+                # one obs, at most
                 expected_observations = prev_step['observations']
-                hist_start = len(state.history) - len(expected_observations)
+
+                # check if the history matches the expected observations
+                hist_events = state.history.get_last_events(len(expected_observations))
                 for i in range(len(expected_observations)):
-                    hist_obs = event_to_dict(state.history[hist_start + i][1])
+                    hist_obs = event_to_dict(hist_events[i])
                     expected_obs = event_to_dict(expected_observations[i])
                     if (
                         'command_id' in hist_obs['extras']
@@ -143,9 +148,6 @@ def step(self, state: State) -> Action:
                     ):
                         del expected_obs['extras']['command_id']
                         expected_obs['content'] = ''
-                    if hist_obs != expected_obs:
-                        print('\nactual', hist_obs)
-                        print('\nexpect', expected_obs)
                     assert (
                         hist_obs == expected_obs
                     ), f'Expected observation {expected_obs}, got {hist_obs}'

diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py
@@ -7,6 +7,7 @@
 from opendevin.events.serialization.action import action_from_dict
 from opendevin.events.serialization.event import event_to_memory
 from opendevin.llm.llm import LLM
+from opendevin.memory.history import ShortTermHistory
 
 from .instructions import instructions
 from .registry import all_microagents
@@ -27,18 +28,24 @@ def to_json(obj, **kwargs):
     return json.dumps(obj, **kwargs)
 
 
-def history_to_json(obj, **kwargs):
+def history_to_json(history: ShortTermHistory, max_events=20, **kwargs):
     """
     Serialize and simplify history to str format
     """
-    if isinstance(obj, list):
-        # process history, make it simpler.
-        processed_history = []
-        for action, observation in obj:
-            processed_history.append(
-                (event_to_memory(action), event_to_memory(observation))
-            )
-        return json.dumps(processed_history, **kwargs)
+
+    processed_history = []
+    event_count = 0
+
+    for event in history.get_events(reverse=True):
+        if event_count >= max_events:
+            break
+        processed_history.append(event_to_memory(event))
+        event_count += 1
+
+    # history is in reverse order, let's fix it
+    processed_history.reverse()
+
+    return json.dumps(processed_history, **kwargs)
 
 
 class MicroAgent(Agent):

diff --git a/agenthub/micro/coder/prompt.md b/agenthub/micro/coder/prompt.md
@@ -21,7 +21,7 @@ Do NOT finish until you have completed the tasks.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/agenthub/micro/commit_writer/prompt.md b/agenthub/micro/commit_writer/prompt.md
@@ -20,7 +20,7 @@ action with `outputs.answer` set to the answer.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 If the last item in the history is an error, you should try to fix it.
 

diff --git a/agenthub/micro/manager/prompt.md b/agenthub/micro/manager/prompt.md
@@ -27,7 +27,7 @@ you have delegated to, and why they failed).
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 If the last item in the history is an error, you should try to fix it. If you
 cannot fix it, call the `reject` action.

diff --git a/agenthub/micro/math_agent/prompt.md b/agenthub/micro/math_agent/prompt.md
@@ -10,7 +10,7 @@ and call the `finish` action with `outputs.answer` set to the answer.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 If the last item in the history is an error, you should try to fix it.
 

diff --git a/agenthub/micro/postgres_agent/prompt.md b/agenthub/micro/postgres_agent/prompt.md
@@ -18,7 +18,7 @@ You may take any of the following actions:
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/agenthub/micro/repo_explorer/prompt.md b/agenthub/micro/repo_explorer/prompt.md
@@ -20,7 +20,7 @@ When you're done, put your summary into the output of the `finish` action.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/agenthub/micro/study_repo_for_task/prompt.md b/agenthub/micro/study_repo_for_task/prompt.md
@@ -24,7 +24,7 @@ implement the solution. If the codebase is empty, you should call the `finish` a
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 ## Format
 {{ instructions.format.action }}

diff --git a/agenthub/micro/typo_fixer_agent/prompt.md b/agenthub/micro/typo_fixer_agent/prompt.md
@@ -31,7 +31,7 @@ Do NOT finish until you have fixed all the typos and generated a summary.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-5:]) }}
+{{ history_to_json(state.history, max_events=10) }}
 
 ## Format
 {{ instructions.format.action }}

diff --git a/agenthub/micro/verifier/prompt.md b/agenthub/micro/verifier/prompt.md
@@ -22,7 +22,7 @@ explaining what the problem is.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history[-10:]) }}
+{{ history_to_json(state.history, max_events=20) }}
 
 ## Format
 {{ instructions.format.action }}