Customize LLM config per agent (#2756)

Currently, OpenDevin uses a global singleton LLM config and a global singleton agent config. This PR allows customers to configure an LLM config for each agent. A hypothetically useful scenario is to use a cheaper LLM for repo exploration / code search, and a more powerful LLM to actually do the problem solving (CodeActAgent). Partially solves #2075 (web GUI improvement is not the goal of this PR)
All-Hands-AI · Jul 10, 2024 · c68478f · c68478f
1 parent 23e2d01
commit c68478f
Show file tree

Hide file tree

Showing 35 changed files with 522 additions and 227 deletions.
diff --git a/.github/workflows/dummy-agent-test.yml b/.github/workflows/dummy-agent-test.yml
@@ -31,7 +31,7 @@ jobs:
       - name: Run tests
         run: |
           set -e
-          poetry run python opendevin/core/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
+          poetry run python opendevin/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
       - name: Check exit code
         run: |
           if [ $? -ne 0 ]; then

diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
@@ -8,6 +8,7 @@
 )
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
+from opendevin.core.config import config
 from opendevin.events.action import (
     Action,
     AgentDelegateAction,
@@ -60,8 +61,11 @@ def get_action_message(action: Action) -> dict[str, str] | None:
 
 
 def get_observation_message(obs) -> dict[str, str] | None:
+    max_message_chars = config.get_llm_config_from_agent(
+        'CodeActAgent'
+    ).max_message_chars
     if isinstance(obs, CmdOutputObservation):
-        content = 'OBSERVATION:\n' + truncate_content(obs.content)
+        content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
         content += (
             f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
         )
@@ -76,10 +80,12 @@ def get_observation_message(obs) -> dict[str, str] | None:
                     '![image](data:image/png;base64, ...) already displayed to user'
                 )
         content = '\n'.join(splitted)
-        content = truncate_content(content)
+        content = truncate_content(content, max_message_chars)
         return {'role': 'user', 'content': content}
     elif isinstance(obs, AgentDelegateObservation):
-        content = 'OBSERVATION:\n' + truncate_content(str(obs.outputs))
+        content = 'OBSERVATION:\n' + truncate_content(
+            str(obs.outputs), max_message_chars
+        )
         return {'role': 'user', 'content': content}
     return None
 

diff --git a/agenthub/codeact_swe_agent/codeact_swe_agent.py b/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -7,6 +7,7 @@
 from agenthub.codeact_swe_agent.response_parser import CodeActSWEResponseParser
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
+from opendevin.core.config import config
 from opendevin.events.action import (
     Action,
     AgentFinishAction,
@@ -52,8 +53,11 @@ def get_action_message(action: Action) -> dict[str, str] | None:
 
 
 def get_observation_message(obs) -> dict[str, str] | None:
+    max_message_chars = config.get_llm_config_from_agent(
+        'CodeActSWEAgent'
+    ).max_message_chars
     if isinstance(obs, CmdOutputObservation):
-        content = 'OBSERVATION:\n' + truncate_content(obs.content)
+        content = 'OBSERVATION:\n' + truncate_content(obs.content, max_message_chars)
         content += (
             f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
         )
@@ -68,7 +72,7 @@ def get_observation_message(obs) -> dict[str, str] | None:
                     '![image](data:image/png;base64, ...) already displayed to user'
                 )
         content = '\n'.join(splitted)
-        content = truncate_content(content)
+        content = truncate_content(content, max_message_chars)
         return {'role': 'user', 'content': content}
     return None
 

diff --git a/agenthub/micro/agent.py b/agenthub/micro/agent.py
@@ -2,6 +2,7 @@
 
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
+from opendevin.core.config import config
 from opendevin.core.utils import json
 from opendevin.events.action import Action
 from opendevin.events.serialization.action import action_from_dict
@@ -32,14 +33,17 @@ def history_to_json(history: ShortTermHistory, max_events=20, **kwargs):
     """
     Serialize and simplify history to str format
     """
+    # TODO: get agent specific llm config
+    llm_config = config.get_llm_config()
+    max_message_chars = llm_config.max_message_chars
 
     processed_history = []
     event_count = 0
 
     for event in history.get_events(reverse=True):
         if event_count >= max_events:
             break
-        processed_history.append(event_to_memory(event))
+        processed_history.append(event_to_memory(event, max_message_chars))
         event_count += 1
 
     # history is in reverse order, let's fix it

diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py
@@ -29,7 +29,7 @@
 from opendevin.memory.condenser import MemoryCondenser
 from opendevin.runtime.tools import RuntimeTool
 
-if config.agent.memory_enabled:
+if config.get_agent_config('MonologueAgent').memory_enabled:
     from opendevin.memory.memory import LongTermMemory
 
 
@@ -78,7 +78,7 @@ def _initialize(self, task: str):
             raise AgentNoInstructionError()
 
         self.initial_thoughts = []
-        if config.agent.memory_enabled:
+        if config.get_agent_config('MonologueAgent').memory_enabled:
             self.memory = LongTermMemory()
         else:
             self.memory = None
@@ -89,6 +89,9 @@ def _initialize(self, task: str):
         self._initialized = True
 
     def _add_initial_thoughts(self, task):
+        max_message_chars = config.get_llm_config_from_agent(
+            'MonologueAgent'
+        ).max_message_chars
         previous_action = ''
         for thought in INITIAL_THOUGHTS:
             thought = thought.replace('$TASK', task)
@@ -106,7 +109,9 @@ def _add_initial_thoughts(self, task):
                     observation = BrowserOutputObservation(
                         content=thought, url='', screenshot=''
                     )
-                self.initial_thoughts.append(event_to_memory(observation))
+                self.initial_thoughts.append(
+                    event_to_memory(observation, max_message_chars)
+                )
                 previous_action = ''
             else:
                 action: Action = NullAction()
@@ -133,7 +138,7 @@ def _add_initial_thoughts(self, task):
                     previous_action = ActionType.BROWSE
                 else:
                     action = MessageAction(thought)
-                self.initial_thoughts.append(event_to_memory(action))
+                self.initial_thoughts.append(event_to_memory(action, max_message_chars))
 
     def step(self, state: State) -> Action:
         """
@@ -145,15 +150,17 @@ def step(self, state: State) -> Action:
         Returns:
         - Action: The next action to take based on LLM response
         """
-
+        max_message_chars = config.get_llm_config_from_agent(
+            'MonologueAgent'
+        ).max_message_chars
         goal = state.get_current_user_intent()
         self._initialize(goal)
 
         recent_events: list[dict[str, str]] = []
 
         # add the events from state.history
         for event in state.history.get_events():
-            recent_events.append(event_to_memory(event))
+            recent_events.append(event_to_memory(event, max_message_chars))
 
         # add the last messages to long term memory
         if self.memory is not None:
@@ -163,9 +170,11 @@ def step(self, state: State) -> Action:
             # this should still work
             # we will need to do this differently: find out if there really is an action or an observation in this step
             if last_action:
-                self.memory.add_event(event_to_memory(last_action))
+                self.memory.add_event(event_to_memory(last_action, max_message_chars))
             if last_observation:
-                self.memory.add_event(event_to_memory(last_observation))
+                self.memory.add_event(
+                    event_to_memory(last_observation, max_message_chars)
+                )
 
         # the action prompt with initial thoughts and recent events
         prompt = prompts.get_request_action_prompt(

diff --git a/agenthub/planner_agent/prompt.py b/agenthub/planner_agent/prompt.py
@@ -1,4 +1,5 @@
 from opendevin.controller.state.state import State
+from opendevin.core.config import config
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.schema import ActionType
 from opendevin.core.utils import json
@@ -128,6 +129,9 @@ def get_prompt(state: State) -> str:
     Returns:
     - str: The formatted string prompt with historical values
     """
+    max_message_chars = config.get_llm_config_from_agent(
+        'PlannerAgent'
+    ).max_message_chars
 
     # the plan
     plan_str = json.dumps(state.root_task.to_dict(), indent=2)
@@ -142,7 +146,7 @@ def get_prompt(state: State) -> str:
             break
         if latest_action == NullAction() and isinstance(event, Action):
             latest_action = event
-        history_dicts.append(event_to_memory(event))
+        history_dicts.append(event_to_memory(event, max_message_chars))
 
     # history_dicts is in reverse order, lets fix it
     history_dicts.reverse()
@@ -160,7 +164,7 @@ def get_prompt(state: State) -> str:
         plan_status = "You're not currently working on any tasks. Your next action MUST be to mark a task as in_progress."
 
     # the hint, based on the last action
-    hint = get_hint(event_to_memory(latest_action).get('action', ''))
+    hint = get_hint(event_to_memory(latest_action, max_message_chars).get('action', ''))
     logger.info('HINT:\n' + hint, extra={'msg_type': 'DETAIL'})
 
     # the last relevant user message (the task)

diff --git a/config.template.toml b/config.template.toml
@@ -79,8 +79,12 @@ persist_sandbox = false
 # Use host network
 #use_host_network = false
 
+# Name of the default agent
+#default_agent = "CodeActAgent"
+
 #################################### LLM #####################################
-# Configuration for the LLM model
+# Configuration for LLM models (group name starts with 'llm')
+# use 'llm' for the default LLM config
 ##############################################################################
 [llm]
 # AWS access key ID
@@ -149,8 +153,18 @@ model = "gpt-4o"
 # Top p for the API
 #top_p = 0.5
 
+[llm.gpt3]
+# API key to use
+api_key = "your-api-key"
+
+# Model to use
+model = "gpt-3.5"
+
 #################################### Agent ###################################
-# Configuration for the agent
+# Configuration for agents (group name starts with 'agent')
+# Use 'agent' for the default agent config
+# otherwise, group name must be `agent.<agent_name>` (case-sensitive), e.g.
+# agent.CodeActAgent
 ##############################################################################
 [agent]
 # Memory enabled
@@ -159,8 +173,13 @@ model = "gpt-4o"
 # Memory maximum threads
 #memory_max_threads = 2
 
-# Name of the agent
-#name = "CodeActAgent"
+# LLM config group to use
+#llm_config = 'llm'
+
+[agent.RepoExplorerAgent]
+# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
+# useful when an agent doesn't demand high quality but uses a lot of tokens
+llm_config = 'gpt3'
 
 #################################### Sandbox ###################################
 # Configuration for the sandbox

diff --git a/docs/modules/usage/changelog.md b/docs/modules/usage/changelog.md
@@ -0,0 +1,75 @@
+---
+sidebar_position: 8
+---
+
+# Changelog
+
+## 0.8 (release date: ??)
+
+### Config breaking changes
+
+In this release we introduced a few breaking changes to backend configurations.
+If you have only been using OpenDevin via frontend (web GUI), nothing needs
+to be taken care of.
+
+Here's a list of breaking changes in configs. They only apply to users who
+use OpenDevin CLI via `main.py`. For more detail, see [#2756](https://github.com/OpenDevin/OpenDevin/pull/2756).
+
+#### Removal of --model-name option from main.py
+
+Please note that `--model-name`, or `-m` option, no longer exists. You should set up the LLM
+configs in `config.toml` or via environmental variables.
+
+#### LLM config groups must be subgroups of 'llm'
+
+Prior to release 0.8, you can use arbitrary name for llm config in `config.toml`, e.g.
+
+```toml
+[gpt-4o]
+model="gpt-4o"
+api_key="<your_api_key>"
+```
+
+and then use `--llm-config` CLI argument to specify the desired LLM config group
+by name. This no longer works. Instead, the config group must be under `llm` group,
+e.g.:
+
+```toml
+[llm.gpt-4o]
+model="gpt-4o"
+api_key="<your_api_key>"
+```
+
+If you have a config group named `llm`, no need to change it, it will be used
+as the default LLM config group.
+
+#### 'agent' group no longer contains 'name' field
+
+Prior to release 0.8, you may or may not have a config group named `agent` that
+looks like this:
+
+```toml
+[agent]
+name="CodeActAgent"
+memory_max_threads=2
+```
+
+Note the `name` field is now removed. Instead, you should put `default_agent` field
+under `core` group, e.g.
+
+```toml
+[core]
+# other configs
+default_agent='CodeActAgent'
+
+[agent]
+llm_config='llm'
+memory_max_threads=2
+
+[agent.CodeActAgent]
+llm_config='gpt-4o'
+```
+
+Note that similar to `llm` subgroups, you can also define `agent` subgroups.
+Moreover, an agent can be associated with a specific LLM config group. For more
+detail, see the examples in `config.template.toml`.
diff --git a/evaluation/TUTORIAL.md b/evaluation/TUTORIAL.md
@@ -53,14 +53,14 @@ api_key = "sk-XXX"
 
 In this section, for the purpose of building an evaluation task, we don't use the standard OpenDevin web-based GUI, but rather run OpenDevin backend from CLI.
 
-For example, you can run the following, which performs the specified task `-t`, with a particular model `-m` and agent `-c`, for a maximum number of iterations `-i`:
+For example, you can run the following, which performs the specified task `-t`, with a particular model config `-l` and agent `-c`, for a maximum number of iterations `-i`:
 
 ```bash
 poetry run python ./opendevin/core/main.py \
         -i 10 \
         -t "Write me a bash script that print hello world." \
         -c CodeActAgent \
-        -m gpt-4o-2024-05-13
+        -l llm
 ```
 
 After running the script, you will observe the following:

diff --git a/evaluation/agent_bench/README.md b/evaluation/agent_bench/README.md
@@ -29,12 +29,12 @@ enable_auto_lint = true
 box_type = "ssh"
 timeout = 120
 
-[eval_gpt35_turbo]
+[llm.eval_gpt35_turbo]
 model = "gpt-3.5-turbo"
 api_key = "sk-123"
 temperature = 0.0
 
-[eval_gpt4o]
+[llm.eval_gpt4o]
 model = "gpt-4o"
 api_key = "sk-123"
 temperature = 0.0

diff --git a/evaluation/bird/README.md b/evaluation/bird/README.md
@@ -21,12 +21,12 @@ ssh_hostname = "localhost"
 enable_auto_lint = true
 
 # TODO: Change these to the model you want to evaluate
-[eval_gpt4_1106_preview]
+[llm.eval_gpt4_1106_preview]
 model = "gpt-4-1106-preview"
 api_key = "XXX"
 temperature = 0.0
 
-[eval_some_openai_compatible_model]
+[llm.eval_some_openai_compatible_model]
 model = "openai/MODEL_NAME"
 base_url = "https://OPENAI_COMPATIBLE_URL/v1"
 api_key = "XXX"