diff --git a/doc/source/getting_started/installation.rst b/doc/source/getting_started/installation.rst
index 40fbf2a823..c6562746c3 100644
--- a/doc/source/getting_started/installation.rst
+++ b/doc/source/getting_started/installation.rst
@@ -42,7 +42,7 @@ Currently, supported models include:
 - ``llama-2``, ``llama-3``, ``llama-2-chat``, ``llama-3-instruct``
 - ``baichuan``, ``baichuan-chat``, ``baichuan-2-chat``
 - ``internlm-16k``, ``internlm-chat-7b``, ``internlm-chat-8k``, ``internlm-chat-20b``
-- ``mistral-v0.1``, ``mistral-instruct-v0.1``, ``mistral-instruct-v0.2``, ``mistral-instruct-v0.3``
+- ``mistral-v0.1``, ``mistral-instruct-v0.1``, ``mistral-instruct-v0.2``, ``mistral-instruct-v0.3``, ``mistral-nemo-instruct``
 - ``codestral-v0.1``
 - ``Yi``, ``Yi-1.5``, ``Yi-chat``, ``Yi-1.5-chat``, ``Yi-1.5-chat-16k``
 - ``code-llama``, ``code-llama-python``, ``code-llama-instruct``
diff --git a/doc/source/models/builtin/llm/index.rst b/doc/source/models/builtin/llm/index.rst
index 2c25cd243b..f3673f7172 100644
--- a/doc/source/models/builtin/llm/index.rst
+++ b/doc/source/models/builtin/llm/index.rst
@@ -326,6 +326,11 @@ The following is a list of built-in LLM in Xinference:
      - 32768
      - The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.
 
+   * - :ref:`mistral-nemo-instruct <models_llm_mistral-nemo-instruct>`
+     - chat
+     - 1024000
+     - The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407
+
    * - :ref:`mistral-v0.1 <models_llm_mistral-v0.1>`
      - generate
      - 8192
@@ -695,6 +700,8 @@ The following is a list of built-in LLM in Xinference:
   
    mistral-instruct-v0.3
   
+   mistral-nemo-instruct
+  
    mistral-v0.1
   
    mixtral-8x22b-instruct-v0.1
diff --git a/doc/source/models/builtin/llm/mistral-nemo-instruct.rst b/doc/source/models/builtin/llm/mistral-nemo-instruct.rst
new file mode 100644
index 0000000000..d863e09304
--- /dev/null
+++ b/doc/source/models/builtin/llm/mistral-nemo-instruct.rst
@@ -0,0 +1,159 @@
+.. _models_llm_mistral-nemo-instruct:
+
+========================================
+mistral-nemo-instruct
+========================================
+
+- **Context Length:** 1024000
+- **Model Name:** mistral-nemo-instruct
+- **Languages:** en, fr, de, es, it, pt, zh, ru, ja
+- **Abilities:** chat
+- **Description:** The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407
+
+Specifications
+^^^^^^^^^^^^^^
+
+
+Model Spec 1 (pytorch, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 12
+- **Quantizations:** none
+- **Engines**: vLLM, Transformers
+- **Model ID:** mistralai/Mistral-Nemo-Instruct-2407
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407>`__, `ModelScope <https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Instruct-2407>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format pytorch --quantization ${quantization}
+
+
+Model Spec 2 (pytorch, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 12
+- **Quantizations:** 4-bit
+- **Engines**: Transformers
+- **Model ID:** unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit>`__, `ModelScope <https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Instruct-2407>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format pytorch --quantization ${quantization}
+
+
+Model Spec 3 (pytorch, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** pytorch
+- **Model Size (in billions):** 12
+- **Quantizations:** 8-bit
+- **Engines**: Transformers
+- **Model ID:** afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit>`__, `ModelScope <https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Instruct-2407>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format pytorch --quantization ${quantization}
+
+
+Model Spec 4 (gptq, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** gptq
+- **Model Size (in billions):** 12
+- **Quantizations:** Int4
+- **Engines**: vLLM, Transformers
+- **Model ID:** ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit>`__, `ModelScope <https://modelscope.cn/models/LLM-Research/Mistral-Nemo-Instruct-2407-gptq-4bit>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format gptq --quantization ${quantization}
+
+
+Model Spec 5 (awq, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** awq
+- **Model Size (in billions):** 12
+- **Quantizations:** Int4
+- **Engines**: vLLM, Transformers
+- **Model ID:** casperhansen/mistral-nemo-instruct-2407-awq
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/casperhansen/mistral-nemo-instruct-2407-awq>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format awq --quantization ${quantization}
+
+
+Model Spec 6 (ggufv2, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** ggufv2
+- **Model Size (in billions):** 12
+- **Quantizations:** Q2_K, Q3_K_S, Q3_K_M, Q3_K_L, Q4_K_S, Q4_K_M, Q5_K_S, Q5_K_M, Q6_K, Q8_0, fp16
+- **Engines**: llama.cpp
+- **Model ID:** MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format ggufv2 --quantization ${quantization}
+
+
+Model Spec 7 (mlx, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** mlx
+- **Model Size (in billions):** 12
+- **Quantizations:** none
+- **Engines**: MLX
+- **Model ID:** mlx-community/Mistral-Nemo-Instruct-2407-bf16
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/mlx-community/Mistral-Nemo-Instruct-2407-bf16>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format mlx --quantization ${quantization}
+
+
+Model Spec 8 (mlx, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** mlx
+- **Model Size (in billions):** 12
+- **Quantizations:** 4-bit
+- **Engines**: MLX
+- **Model ID:** mlx-community/Mistral-Nemo-Instruct-2407-4bit
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/mlx-community/Mistral-Nemo-Instruct-2407-4bit>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format mlx --quantization ${quantization}
+
+
+Model Spec 9 (mlx, 12 Billion)
+++++++++++++++++++++++++++++++++++++++++
+
+- **Model Format:** mlx
+- **Model Size (in billions):** 12
+- **Quantizations:** 8-bit
+- **Engines**: MLX
+- **Model ID:** mlx-community/Mistral-Nemo-Instruct-2407-8bit
+- **Model Hubs**:  `Hugging Face <https://huggingface.co/mlx-community/Mistral-Nemo-Instruct-2407-8bit>`__
+
+Execute the following command to launch the model, remember to replace ``${quantization}`` with your
+chosen quantization method from the options listed above::
+
+   xinference launch --model-engine ${engine} --model-name mistral-nemo-instruct --size-in-billions 12 --model-format mlx --quantization ${quantization}
+
diff --git a/doc/source/user_guide/backends.rst b/doc/source/user_guide/backends.rst
index a2202ba162..7d00b22e18 100644
--- a/doc/source/user_guide/backends.rst
+++ b/doc/source/user_guide/backends.rst
@@ -49,7 +49,7 @@ Currently, supported model includes:
 - ``llama-2``, ``llama-3``, ``llama-2-chat``, ``llama-3-instruct``
 - ``baichuan``, ``baichuan-chat``, ``baichuan-2-chat``
 - ``internlm-16k``, ``internlm-chat-7b``, ``internlm-chat-8k``, ``internlm-chat-20b``
-- ``mistral-v0.1``, ``mistral-instruct-v0.1``, ``mistral-instruct-v0.2``, ``mistral-instruct-v0.3``
+- ``mistral-v0.1``, ``mistral-instruct-v0.1``, ``mistral-instruct-v0.2``, ``mistral-instruct-v0.3``, ``mistral-nemo-instruct``
 - ``codestral-v0.1``
 - ``Yi``, ``Yi-1.5``, ``Yi-chat``, ``Yi-1.5-chat``, ``Yi-1.5-chat-16k``
 - ``code-llama``, ``code-llama-python``, ``code-llama-instruct``
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index 5e456bdfb9..3b35fa333b 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -3936,6 +3936,130 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 1024000,
+    "model_name": "mistral-nemo-instruct",
+    "model_lang": [
+      "en",
+      "fr",
+      "de",
+      "es",
+      "it",
+      "pt",
+      "zh",
+      "ru",
+      "ja"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mistralai/Mistral-Nemo-Instruct-2407",
+        "model_revision": "05b1e4f3e189ec1b5189fb3c973d4cf3369c27af"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
+        "model_revision": "1d85adc9e0fff0b8e4479a037bd75fe1346333ca"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit",
+        "model_revision": "1d2dacf18a486c745219317d1507441406bc7e25"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/mistral-nemo-instruct-2407-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_S",
+          "Q3_K_M",
+          "Q3_K_L",
+          "Q4_K_S",
+          "Q4_K_M",
+          "Q5_K_S",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "fp16"
+        ],
+        "model_id": "MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
+        "model_file_name_template": "Mistral-Nemo-Instruct-2407.{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-bf16"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "mistral-nemo",
+      "system_prompt": "",
+      "roles": [
+        "[INST]",
+        "[/INST]"
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "</s>",
+      "stop_token_ids": [
+        2
+      ],
+      "stop": [
+        "</s>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 32768,
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
index 8510beb796..fffd2dfa3d 100644
--- a/xinference/model/llm/llm_family_modelscope.json
+++ b/xinference/model/llm/llm_family_modelscope.json
@@ -2078,6 +2078,62 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 1024000,
+    "model_name": "mistral-nemo-instruct",
+    "model_lang": [
+      "en",
+      "fr",
+      "de",
+      "es",
+      "it",
+      "pt",
+      "zh",
+      "ru",
+      "ja"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "AI-ModelScope/Mistral-Nemo-Instruct-2407",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "LLM-Research/Mistral-Nemo-Instruct-2407-gptq-4bit",
+        "model_hub": "modelscope"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "mistral-nemo",
+      "system_prompt": "",
+      "roles": [
+        "[INST]",
+        "[/INST]"
+      ],
+      "intra_message_sep": "",
+      "inter_message_sep": "</s>",
+      "stop_token_ids": [
+        2
+      ],
+      "stop": [
+        "</s>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
diff --git a/xinference/model/llm/utils.py b/xinference/model/llm/utils.py
index 9ba61ea78b..aadcbf9471 100644
--- a/xinference/model/llm/utils.py
+++ b/xinference/model/llm/utils.py
@@ -483,6 +483,27 @@ def get_role(role_name: str):
                 else:
                     ret += role
             return ret
+        elif prompt_style.style_name == "mistral-nemo":
+            seps = [prompt_style.intra_message_sep, prompt_style.inter_message_sep]
+            ret = "<s>"
+            for i, message in enumerate(chat_history):
+                role = get_role(message["role"])
+                content = message["content"]
+                if content:
+                    if i == len(chat_history) - 2 and prompt_style.system_prompt:
+                        ret += (
+                            role
+                            + " "
+                            + prompt_style.system_prompt
+                            + "\n\n"
+                            + content
+                            + seps[i % 2]
+                        )
+                    else:
+                        ret += role + " " + content + seps[i % 2]
+                else:
+                    ret += role
+            return ret
         else:
             raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
 
diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
index 77b0b55b28..bffdf22ecc 100644
--- a/xinference/model/llm/vllm/core.py
+++ b/xinference/model/llm/vllm/core.py
@@ -151,6 +151,9 @@ class VLLMGenerateConfig(TypedDict, total=False):
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen2-moe-instruct")
     VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
 
+if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
+    VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")
+
 
 class VLLMModel(LLM):
     def __init__(