Skip to content

Commit

Permalink
Refactoring: event stream based agent history (#2709)
Browse files Browse the repository at this point in the history
* add to event stream sync

* remove async from tests

* small logging spam fix

* remove swe agent

* arch refactoring: use history from the event stream

* refactor agents

* monologue agent

* ruff

* planner agent

* micro-agents

* refactor history in evaluations

* evals history refactoring

* adapt evals and tests

* unit testing stuck

* testing micro agents, event stream

* fix planner agent

* fix tests

* fix stuck after rename

* fix test

* small clean up

* fix merge

* fix merge issue

* fix integration tests

* Update agenthub/dummy_agent/agent.py

* fix tests

* rename more clearly; add todo; clean up
  • Loading branch information
enyst authored Jul 7, 2024
1 parent 9dc2d2c commit d37b297
Show file tree
Hide file tree
Showing 107 changed files with 1,670 additions and 676 deletions.
25 changes: 12 additions & 13 deletions agenthub/browsing_agent/browsing_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from opendevin.events.event import EventSource
from opendevin.events.observation import BrowserOutputObservation
from opendevin.events.observation.observation import Observation
from opendevin.llm.llm import LLM
from opendevin.runtime.plugins import (
PluginRequirement,
Expand Down Expand Up @@ -146,23 +147,21 @@ def step(self, state: State) -> Action:
last_obs = None
last_action = None

if EVAL_MODE and len(state.history) == 1:
if EVAL_MODE and len(state.history.get_events_as_list()) == 1:
# for webarena and miniwob++ eval, we need to retrieve the initial observation already in browser env
# initialize and retrieve the first observation by issuing an noop OP
# For non-benchmark browsing, the browser env starts with a blank page, and the agent is expected to first navigate to desired websites
return BrowseInteractiveAction(browser_actions='noop()')

for prev_action, obs in state.history:
if isinstance(prev_action, BrowseInteractiveAction):
prev_actions.append(prev_action.browser_actions)
last_obs = obs
last_action = prev_action
elif (
isinstance(prev_action, MessageAction)
and prev_action.source == EventSource.AGENT
):
# agent has responded, task finish.
return AgentFinishAction(outputs={'content': prev_action.content})
for event in state.history.get_events():
if isinstance(event, BrowseInteractiveAction):
prev_actions.append(event.browser_actions)
last_action = event
elif isinstance(event, MessageAction) and event.source == EventSource.AGENT:
# agent has responded, task finished.
return AgentFinishAction(outputs={'content': event.content})
elif isinstance(event, Observation):
last_obs = event

if EVAL_MODE:
prev_actions = prev_actions[1:] # remove the first noop action
Expand Down Expand Up @@ -207,7 +206,7 @@ def step(self, state: State) -> Action:

prompt = get_prompt(error_prefix, cur_axtree_txt, prev_action_str)
messages.append({'role': 'user', 'content': prompt})
logger.info(prompt)
logger.debug(prompt)
response = self.llm.completion(
messages=messages,
temperature=0.0,
Expand Down
57 changes: 38 additions & 19 deletions agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,27 +182,14 @@ def step(self, state: State) -> Action:
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
messages: list[dict[str, str]] = [
{'role': 'system', 'content': self.system_message},
{'role': 'user', 'content': self.in_context_example},
]

for prev_action, obs in state.history:
action_message = get_action_message(prev_action)
if action_message:
messages.append(action_message)
# if we're done, go back
latest_user_message = state.history.get_last_user_message()
if latest_user_message and latest_user_message.strip() == '/exit':
return AgentFinishAction()

obs_message = get_observation_message(obs)
if obs_message:
messages.append(obs_message)

latest_user_message = [m for m in messages if m['role'] == 'user'][-1]
if latest_user_message:
if latest_user_message['content'].strip() == '/exit':
return AgentFinishAction()
latest_user_message['content'] += (
f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
)
# prepare what we want to send to the LLM
messages: list[dict[str, str]] = self._get_messages(state)

response = self.llm.completion(
messages=messages,
Expand All @@ -217,3 +204,35 @@ def step(self, state: State) -> Action:

def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')

def _get_messages(self, state: State) -> list[dict[str, str]]:
messages = [
{'role': 'system', 'content': self.system_message},
{'role': 'user', 'content': self.in_context_example},
]

for event in state.history.get_events():
# create a regular message from an event
message = (
get_action_message(event)
if isinstance(event, Action)
else get_observation_message(event)
)

# add regular message
if message:
messages.append(message)

# the latest user message is important:
# we want to remind the agent of the environment constraints
latest_user_message = next(
(m for m in reversed(messages) if m['role'] == 'user'), None
)

# add a reminder to the prompt
if latest_user_message:
latest_user_message['content'] += (
f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>'
)

return messages
57 changes: 38 additions & 19 deletions agenthub/codeact_swe_agent/codeact_swe_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,27 +138,14 @@ def step(self, state: State) -> Action:
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
messages: list[dict[str, str]] = [
{'role': 'system', 'content': self.system_message},
{'role': 'user', 'content': self.in_context_example},
]

for prev_action, obs in state.history:
action_message = get_action_message(prev_action)
if action_message:
messages.append(action_message)
# if we're done, go back
latest_user_message = state.history.get_last_user_message()
if latest_user_message and latest_user_message.strip() == '/exit':
return AgentFinishAction()

obs_message = get_observation_message(obs)
if obs_message:
messages.append(obs_message)

latest_user_message = [m for m in messages if m['role'] == 'user'][-1]
if latest_user_message:
if latest_user_message['content'].strip() == '/exit':
return AgentFinishAction()
latest_user_message['content'] += (
f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
)
# prepare what we want to send to the LLM
messages: list[dict[str, str]] = self._get_messages(state)

response = self.llm.completion(
messages=messages,
Expand All @@ -173,3 +160,35 @@ def step(self, state: State) -> Action:

def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')

def _get_messages(self, state: State) -> list[dict[str, str]]:
messages = [
{'role': 'system', 'content': self.system_message},
{'role': 'user', 'content': self.in_context_example},
]

for event in state.history.get_events():
# create a regular message from an event
message = (
get_action_message(event)
if isinstance(event, Action)
else get_observation_message(event)
)

# add regular message
if message:
messages.append(message)

# the latest user message is important:
# we want to remind the agent of the environment constraints
latest_user_message = next(
(m for m in reversed(messages) if m['role'] == 'user'), None
)

# add a reminder to the prompt
if latest_user_message:
latest_user_message['content'] += (
f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task.'
)

return messages
4 changes: 3 additions & 1 deletion agenthub/delegator_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def step(self, state: State) -> Action:
agent='StudyRepoForTaskAgent', inputs={'task': task}
)

last_observation = state.history[-1][1]
# last observation in history should be from the delegate
last_observation = state.history.get_last_observation()

if not isinstance(last_observation, AgentDelegateObservation):
raise Exception('Last observation is not an AgentDelegateObservation')

Expand Down
12 changes: 7 additions & 5 deletions agenthub/dummy_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,16 @@ def step(self, state: State) -> Action:
time.sleep(0.1)
if state.iteration > 0:
prev_step = self.steps[state.iteration - 1]

# a step is (action, observations list)
if 'observations' in prev_step:
# one obs, at most
expected_observations = prev_step['observations']
hist_start = len(state.history) - len(expected_observations)

# check if the history matches the expected observations
hist_events = state.history.get_last_events(len(expected_observations))
for i in range(len(expected_observations)):
hist_obs = event_to_dict(state.history[hist_start + i][1])
hist_obs = event_to_dict(hist_events[i])
expected_obs = event_to_dict(expected_observations[i])
if (
'command_id' in hist_obs['extras']
Expand All @@ -143,9 +148,6 @@ def step(self, state: State) -> Action:
):
del expected_obs['extras']['command_id']
expected_obs['content'] = ''
if hist_obs != expected_obs:
print('\nactual', hist_obs)
print('\nexpect', expected_obs)
assert (
hist_obs == expected_obs
), f'Expected observation {expected_obs}, got {hist_obs}'
Expand Down
25 changes: 16 additions & 9 deletions agenthub/micro/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from opendevin.events.serialization.action import action_from_dict
from opendevin.events.serialization.event import event_to_memory
from opendevin.llm.llm import LLM
from opendevin.memory.history import ShortTermHistory

from .instructions import instructions
from .registry import all_microagents
Expand All @@ -27,18 +28,24 @@ def to_json(obj, **kwargs):
return json.dumps(obj, **kwargs)


def history_to_json(obj, **kwargs):
def history_to_json(history: ShortTermHistory, max_events=20, **kwargs):
"""
Serialize and simplify history to str format
"""
if isinstance(obj, list):
# process history, make it simpler.
processed_history = []
for action, observation in obj:
processed_history.append(
(event_to_memory(action), event_to_memory(observation))
)
return json.dumps(processed_history, **kwargs)

processed_history = []
event_count = 0

for event in history.get_events(reverse=True):
if event_count >= max_events:
break
processed_history.append(event_to_memory(event))
event_count += 1

# history is in reverse order, let's fix it
processed_history.reverse()

return json.dumps(processed_history, **kwargs)


class MicroAgent(Agent):
Expand Down
2 changes: 1 addition & 1 deletion agenthub/micro/coder/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Do NOT finish until you have completed the tasks.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

## Format
{{ instructions.format.action }}
2 changes: 1 addition & 1 deletion agenthub/micro/commit_writer/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ action with `outputs.answer` set to the answer.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

If the last item in the history is an error, you should try to fix it.

Expand Down
2 changes: 1 addition & 1 deletion agenthub/micro/manager/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ you have delegated to, and why they failed).

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

If the last item in the history is an error, you should try to fix it. If you
cannot fix it, call the `reject` action.
Expand Down
2 changes: 1 addition & 1 deletion agenthub/micro/math_agent/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ and call the `finish` action with `outputs.answer` set to the answer.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

If the last item in the history is an error, you should try to fix it.

Expand Down
2 changes: 1 addition & 1 deletion agenthub/micro/postgres_agent/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ You may take any of the following actions:

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

## Format
{{ instructions.format.action }}
2 changes: 1 addition & 1 deletion agenthub/micro/repo_explorer/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ When you're done, put your summary into the output of the `finish` action.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

## Format
{{ instructions.format.action }}
2 changes: 1 addition & 1 deletion agenthub/micro/study_repo_for_task/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ implement the solution. If the codebase is empty, you should call the `finish` a

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

## Format
{{ instructions.format.action }}
Expand Down
2 changes: 1 addition & 1 deletion agenthub/micro/typo_fixer_agent/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Do NOT finish until you have fixed all the typos and generated a summary.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-5:]) }}
{{ history_to_json(state.history, max_events=10) }}

## Format
{{ instructions.format.action }}
Expand Down
2 changes: 1 addition & 1 deletion agenthub/micro/verifier/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ explaining what the problem is.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history[-10:]) }}
{{ history_to_json(state.history, max_events=20) }}

## Format
{{ instructions.format.action }}
Loading

0 comments on commit d37b297

Please sign in to comment.