From a0f5fe9e02e2e4db50814d571f6c2f41b0eb7e9a Mon Sep 17 00:00:00 2001
From: Aleksei Kashapov <aleksei.kashapov@intel.com>
Date: Wed, 6 Mar 2024 17:02:29 +0100
Subject: [PATCH] add dynamic_batch_shape option to conformance

---
 tests/post_training/conftest.py               |  1 +
 tests/post_training/pipelines/base.py         |  2 +
 .../pipelines/causal_language_model.py        |  2 +
 .../pipelines/image_classification_timm.py    | 44 ++++++++-----------
 .../pipelines/lm_weight_compression.py        |  5 +++
 .../pipelines/masked_language_modeling.py     |  2 +
 .../test_quantize_conformance.py              |  9 ++++
 7 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py
index d860ffee584..6478eae8c1b 100644
--- a/tests/post_training/conftest.py
+++ b/tests/post_training/conftest.py
@@ -20,6 +20,7 @@ def pytest_addoption(parser):
     parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts")
     parser.addoption("--no-eval", action="store_true", help="Skip validation step")
     parser.addoption("--batch_size", action="store", default=1, type=int, help="Batch size of calibration dataset")
+    parser.addoption("--dynamic_batch_shape", action="store_true", help="Export model with dynamic batch axis")
     parser.addoption("--subset-size", type=int, default=None, help="Set subset size")
     parser.addoption("--fp32", action="store_true", help="Test original model")
     parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend")
diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py
index 5349e9e4126..461da0c5a07 100644
--- a/tests/post_training/pipelines/base.py
+++ b/tests/post_training/pipelines/base.py
@@ -183,6 +183,7 @@ def __init__(
         reference_data: dict,
         no_eval: bool,
         run_benchmark_app: bool,
+        dynamic_batch_shape: bool,
         params: dict = None,
         batch_size: int = 1,
     ) -> None:
@@ -195,6 +196,7 @@ def __init__(
         self.reference_data = reference_data
         self.params = params or {}
         self.batch_size = batch_size
+        self.dynamic_batch_shape = dynamic_batch_shape
         self.no_eval = no_eval
         self.run_benchmark_app = run_benchmark_app
         self.output_model_dir: Path = self.output_dir / self.reported_name / self.backend.value
diff --git a/tests/post_training/pipelines/causal_language_model.py b/tests/post_training/pipelines/causal_language_model.py
index 84a011dd137..8dc27c7a8fb 100644
--- a/tests/post_training/pipelines/causal_language_model.py
+++ b/tests/post_training/pipelines/causal_language_model.py
@@ -24,6 +24,8 @@ class CausalLMHF(PTQTestPipeline):
     """Pipeline for causal language models from Hugging Face repository"""
 
     def prepare_model(self) -> None:
+        if self.dynamic_batch_shape:
+            raise ValueError("The model does not support export with dynamic input shape")
         if self.backend in OV_BACKENDS + [BackendType.FP32]:
             self.model_hf = OVModelForCausalLM.from_pretrained(self.model_id, export=True, compile=False)
             self.model = self.model_hf.model
diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py
index cf095260e17..c9a386d369d 100644
--- a/tests/post_training/pipelines/image_classification_timm.py
+++ b/tests/post_training/pipelines/image_classification_timm.py
@@ -36,16 +36,6 @@
 # Disable using aten::scaled_dot_product_attention
 set_fused_attn(False, False)
 
-BATCH_SIZE_NOT_A_DIVISOR_MESSAGE = (
-    "The model validation will be done with batch_size=1 because the provided batch_size value "
-    "is not a divisor of the length of the validation dataset. The compressed model also "
-    "will be reshaped to a shape with batch_size=1."
-)
-BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE = (
-    "To avoid model reshaping, please, provide the --batch_size option which "
-    "is a divisor of the length of the validation dataset."
-)
-
 
 class ImageClassificationTimm(PTQTestPipeline):
     """Pipeline for Image Classification model from timm repository"""
@@ -57,13 +47,21 @@ def prepare_model(self) -> None:
         self.model_cfg = timm_model.default_cfg
         self.input_size = [self.batch_size] + list(timm_model.default_cfg["input_size"])
         self.dummy_tensor = torch.rand(self.input_size)
+        if self.dynamic_batch_shape:
+            self.input_size[0] = -1
 
         if self.backend in PT_BACKENDS:
             self.model = timm_model
 
         if self.backend == BackendType.ONNX:
             onnx_path = self.fp32_model_dir / "model_fp32.onnx"
-            torch.onnx.export(timm_model, self.dummy_tensor, onnx_path, export_params=True, opset_version=13)
+            additional_kwargs = {}
+            if self.dynamic_batch_shape:
+                additional_kwargs["input_names"] = ["image"]
+                additional_kwargs["dynamic_axes"] = {"image": {0: "batch"}}
+            torch.onnx.export(
+                timm_model, self.dummy_tensor, onnx_path, export_params=True, opset_version=13, **additional_kwargs
+            )
             self.model = onnx.load(onnx_path)
             self.input_name = self.model.graph.input[0].name
 
@@ -128,24 +126,20 @@ def prepare_calibration_dataset(self):
 
     def _validate(self):
         val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
+        val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False)
         dataset_size = len(val_dataset)
+        if dataset_size % self.batch_size != 0 and not self.dynamic_batch_shape:
+            raise ValueError(
+                (
+                    "Because the batch_size is not a divisor of the length of the dataset, "
+                    "the one of the data tensors has a shape incompatible with static model input. "
+                    "Use --dynamic_batch_shape option to export such model with dynamic shape."
+                )
+            )
+
         core = ov.Core()
         ov_model = core.read_model(self.path_compressed_ir)
         compiled_model = core.compile_model(ov_model)
-        if dataset_size % self.batch_size != 0:
-            print(BATCH_SIZE_NOT_A_DIVISOR_MESSAGE)
-            self.batch_size = 1
-            try:
-                ov_model.reshape([self.batch_size, *self.input_size[1:]])
-            except Exception as e:
-                print(
-                    (
-                        f"During model reshaping the following error occurred: {os.linesep} {e} {os.linesep}"
-                        f"{BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE}"
-                    )
-                )
-                exit()
-        val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False)
         # Initialize result tensors for async inference support.
         predictions = np.zeros((dataset_size))
         references = -1 * np.ones((dataset_size))
diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py
index 1d69967bce1..bb950172e7c 100644
--- a/tests/post_training/pipelines/lm_weight_compression.py
+++ b/tests/post_training/pipelines/lm_weight_compression.py
@@ -71,6 +71,8 @@ class LMWeightCompression(BaseTestPipeline):
     OV_MODEL_NAME = "openvino_model.xml"
 
     def prepare_model(self) -> None:
+        if self.dynamic_batch_shape:
+            raise ValueError("The model does not support export with dynamic input shape")
         is_stateful = self.params.get("is_stateful", False)
         if is_stateful:
             self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf")
@@ -129,6 +131,9 @@ def transform_fn(data):
         return transform_fn
 
     def prepare_calibration_dataset(self):
+        if self.batch_size > 1:
+            print("Batch size > 1 is not supported for causal language models. Batch size = 1 is set.")
+            self.batch_size = 1
         dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e")
         dataset = dataset.filter(lambda example: len(example["text"]) > 80)
         self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn())
diff --git a/tests/post_training/pipelines/masked_language_modeling.py b/tests/post_training/pipelines/masked_language_modeling.py
index 76cf3206f12..c268dee79ab 100644
--- a/tests/post_training/pipelines/masked_language_modeling.py
+++ b/tests/post_training/pipelines/masked_language_modeling.py
@@ -86,6 +86,8 @@ def transform_func(data):
         return transform_func
 
     def prepare_calibration_dataset(self):
+        if self.dynamic_batch_shape:
+            raise ValueError("The model does not support export with dynamic input shape")
         if self.batch_size > 1:
             print("Batch size > 1 is not supported for masked language models. Batch size = 1 is set.")
             self.batch_size = 1
diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py
index e2472f76af1..6840930fc48 100644
--- a/tests/post_training/test_quantize_conformance.py
+++ b/tests/post_training/test_quantize_conformance.py
@@ -50,6 +50,11 @@ def fixture_batch_size(pytestconfig):
     return pytestconfig.getoption("batch_size")
 
 
+@pytest.fixture(scope="session", name="dynamic_batch_shape")
+def fixture_dynamic_batch_shape(pytestconfig):
+    return pytestconfig.getoption("dynamic_batch_shape")
+
+
 @pytest.fixture(scope="session", name="subset_size")
 def fixture_subset_size(pytestconfig):
     return pytestconfig.getoption("subset_size")
@@ -202,6 +207,7 @@ def test_ptq_quantization(
     ptq_result_data: Dict[str, RunInfo],
     no_eval: bool,
     batch_size: int,
+    dynamic_batch_shape: bool,
     run_fp32_backend: bool,
     run_torch_cuda_backend: bool,
     subset_size: Optional[int],
@@ -231,6 +237,7 @@ def test_ptq_quantization(
                 "no_eval": no_eval,
                 "run_benchmark_app": run_benchmark_app,
                 "batch_size": batch_size,
+                "dynamic_batch_shape": dynamic_batch_shape,
             }
         )
         pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs)
@@ -269,6 +276,7 @@ def test_weight_compression(
     wc_result_data: Dict[str, RunInfo],
     no_eval: bool,
     batch_size: int,
+    dynamic_batch_shape: bool,
     run_fp32_backend: bool,
     run_torch_cuda_backend: bool,
     subset_size: Optional[int],
@@ -294,6 +302,7 @@ def test_weight_compression(
                 "no_eval": no_eval,
                 "run_benchmark_app": run_benchmark_app,
                 "batch_size": batch_size,
+                "dynamic_batch_shape": dynamic_batch_shape,
             }
         )
         pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs)