diff --git a/.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml b/.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
new file mode 100644
index 0000000000000..78347f63fa793
--- /dev/null
+++ b/.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
@@ -0,0 +1,11 @@
+# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
+model_name: "neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8"
+tasks:
+- name: "gsm8k"
+  metrics:
+  - name: "exact_match,strict-match"
+    value: 0.356
+  - name: "exact_match,flexible-extract"
+    value: 0.358
+limit: 1000
+num_fewshot: 5
diff --git a/.buildkite/lm-eval-harness/configs/models-small.txt b/.buildkite/lm-eval-harness/configs/models-small.txt
index 64a0f428587af..6057229ac50f3 100644
--- a/.buildkite/lm-eval-harness/configs/models-small.txt
+++ b/.buildkite/lm-eval-harness/configs/models-small.txt
@@ -1,6 +1,6 @@
 Meta-Llama-3-8B-Instruct.yaml
 Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
-Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
+Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
 Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
 Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
 Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
diff --git a/tests/lora/test_minicpmv.py b/tests/lora/test_minicpmv.py
index 81b8188e638c9..be040060d02b2 100644
--- a/tests/lora/test_minicpmv.py
+++ b/tests/lora/test_minicpmv.py
@@ -61,6 +61,7 @@ def test_minicpmv_lora(minicpmv_lora_files):
         max_loras=4,
         max_lora_rank=64,
         trust_remote_code=True,
+        gpu_memory_utilization=0.97  # This model is pretty big for CI gpus
     )
 
     output1 = do_sample(llm, minicpmv_lora_files, lora_id=1)