vllm-project · DarkLight1337 · Nov 14, 2024 · Oct 30, 2024 · Nov 1, 2024 · Nov 4, 2024
diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md
@@ -329,13 +329,15 @@ Limitations:
 * Llama's smaller models struggle to use tools effectively.
 
 Example supported models:
-* `meta-llama/Llama-3.2-1B-Instruct` (use with `examples/tool_chat_template_llama3.2_pythonic.jinja`)
-* `meta-llama/Llama-3.2-3B-Instruct` (use with `examples/tool_chat_template_llama3.2_pythonic.jinja`)
+* `meta-llama/Llama-3.2-1B-Instruct`\* (use with `examples/tool_chat_template_llama3.2_pythonic.jinja`)
+* `meta-llama/Llama-3.2-3B-Instruct`\* (use with `examples/tool_chat_template_llama3.2_pythonic.jinja`)
 * `Team-ACE/ToolACE-8B` (use with `examples/tool_chat_template_toolace.jinja`)
 * `fixie-ai/ultravox-v0_4-ToolACE-8B` (use with `examples/tool_chat_template_toolace.jinja`)
 
 Flags: `--tool-call-parser pythonic --chat-template {see_above}`
 
+\* Llama's smaller models frequently fail to emit tool calls in the correct format. Your mileage may vary.
+
 
 ### How to write a tool parser plugin
 

diff --git a/tests/tool_use/utils.py b/tests/tool_use/utils.py
@@ -123,17 +123,6 @@ def ensure_system_prompt(messages: List[Dict[str, Any]],
         "supports_parallel":
         False,
     },
-    "llama3.2_pythonic": {
-        "model":
-        "meta-llama/Llama-3.2-3B-Instruct",
-        "arguments": [
-            "--tool-call-parser", "pythonic", "--chat-template",
-            str(VLLM_PATH /
-                "examples/tool_chat_template_llama3.2_pythonic.jinja")
-        ],
-        "supports_parallel":
-        True,
-    },
     "toolACE": {
         "model":
         "Team-ACE/ToolACE-8B",

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
@@ -75,6 +75,11 @@ def __init__(self,
             try:
                 self.tool_parser = ToolParserManager.get_tool_parser(
                     tool_parser)
+                if (self.tool_parser.__name__ == "PythonicToolParser" and
+                        model_config.model.startswith("meta-llama/Llama-3.2")):
+                    logger.warning(
+                        "Llama3.2 models may struggle to emit valid pythonic"
+                        " tool calls")
             except Exception as e:
                 raise TypeError("Error: --enable-auto-tool-choice requires "
                                 f"tool_parser:'{tool_parser}' which has not "

diff --git a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
@@ -29,6 +29,12 @@ class PythonicToolParser(ToolParser):
 
     Used when --enable-auto-tool-choice --tool-call-parser pythonic are all set
     """
+    # TODO(mdepinet): Possible future improvements:
+    #   1. Support text + tools separated by either <|python_tag|> or \n\n
+    #   2. Support tools outside of a list (or separated by a semicolon).
+    #      This depends on item 1 for consistent streaming.
+    # Neither of these are necessary for e.g. ToolACE, but both would help make
+    # Llama3.2 models more reliable.
 
     TOOL_CALL_REGEX = re.compile(
         r"\[([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s)?\),\s*)*([a-zA-Z]+\w*\(([a-zA-Z]+\w*=.*,\s*)*([a-zA-Z]+\w*=.*\s*)?\)\s*)+\]",