From 208ce622c712fef75623f785597dbbd698700fa6 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Fri, 8 Nov 2024 06:39:41 -0800 Subject: [PATCH] [V1]Enable APC by default only for text models (#10148) Signed-off-by: Roger Wang --- vllm/v1/engine/llm_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index cd3f5c75d0d14..81dc01ae2d8e7 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -65,7 +65,10 @@ def __init__( elif usage_context == UsageContext.OPENAI_API_SERVER: scheduler_config.max_num_seqs = 1024 scheduler_config.max_num_batched_tokens = 2048 - cache_config.enable_prefix_caching = True + + # TODO (ywang96): Enable APC by default when VLM supports it. + if not model_config.is_multimodal_model: + cache_config.enable_prefix_caching = True logger.info( "Initializing an LLM engine (v%s) with config: "