Add quantization config option (#433)

Co-authored-by: ZHENG, Zhen <[email protected]> Co-authored-by: Logan Adams <[email protected]>
microsoft · Mar 8, 2024 · 429bc5c · 429bc5c
1 parent 6062c89
commit 429bc5c
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 1 deletion.
diff --git a/mii/api.py b/mii/api.py
@@ -50,6 +50,7 @@ def _parse_kwargs_to_model_config(
 
     # Create the ModelConfig object and return it with remaining kwargs
     model_config = ModelConfig(**model_config)
+
     return model_config, remaining_kwargs
 
 

diff --git a/mii/config.py b/mii/config.py
@@ -131,6 +131,12 @@ class ModelConfig(MIIConfigModel):
     `inference_engine_config`.
     """
 
+    quantization_mode: Optional[str] = None
+    """
+    The quantization mode in string format. The supported modes are as follows:
+        - 'wf6af16', weight-only quantization with FP6 weight and FP16 activation.
+    """
+
     inference_engine_config: RaggedInferenceEngineConfig = {}
     """
     DeepSpeed inference engine config. This is automatically generated, but you
@@ -210,6 +216,13 @@ def propagate_tp_size(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         values.get("inference_engine_config").tensor_parallel.tp_size = tensor_parallel
         return values
 
+    @root_validator
+    def propagate_quantization_mode(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        quantization_mode = values.get("quantization_mode")
+        values.get(
+            "inference_engine_config").quantization.quantization_mode = quantization_mode
+        return values
+
     @root_validator
     def check_replica_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         num_replica_config = len(values.get("replica_configs"))

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,5 +1,5 @@
 asyncio
-deepspeed>=0.13.0
+deepspeed>=0.14.0
 deepspeed-kernels
 Flask-RESTful
 grpcio
Original file line number	Diff line number	Diff line change
Expand Up		@@ -50,6 +50,7 @@ def _parse_kwargs_to_model_config(

		# Create the ModelConfig object and return it with remaining kwargs
		model_config = ModelConfig(**model_config)

		return model_config, remaining_kwargs


Expand Down