diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py index d860ffee584..6478eae8c1b 100644 --- a/tests/post_training/conftest.py +++ b/tests/post_training/conftest.py @@ -20,6 +20,7 @@ def pytest_addoption(parser): parser.addoption("--output", action="store", default="./tmp/", help="Directory to store artifacts") parser.addoption("--no-eval", action="store_true", help="Skip validation step") parser.addoption("--batch_size", action="store", default=1, type=int, help="Batch size of calibration dataset") + parser.addoption("--dynamic_batch_shape", action="store_true", help="Export model with dynamic batch axis") parser.addoption("--subset-size", type=int, default=None, help="Set subset size") parser.addoption("--fp32", action="store_true", help="Test original model") parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend") diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py index 5349e9e4126..461da0c5a07 100644 --- a/tests/post_training/pipelines/base.py +++ b/tests/post_training/pipelines/base.py @@ -183,6 +183,7 @@ def __init__( reference_data: dict, no_eval: bool, run_benchmark_app: bool, + dynamic_batch_shape: bool, params: dict = None, batch_size: int = 1, ) -> None: @@ -195,6 +196,7 @@ def __init__( self.reference_data = reference_data self.params = params or {} self.batch_size = batch_size + self.dynamic_batch_shape = dynamic_batch_shape self.no_eval = no_eval self.run_benchmark_app = run_benchmark_app self.output_model_dir: Path = self.output_dir / self.reported_name / self.backend.value diff --git a/tests/post_training/pipelines/causal_language_model.py b/tests/post_training/pipelines/causal_language_model.py index 84a011dd137..8dc27c7a8fb 100644 --- a/tests/post_training/pipelines/causal_language_model.py +++ b/tests/post_training/pipelines/causal_language_model.py @@ -24,6 +24,8 @@ class CausalLMHF(PTQTestPipeline): """Pipeline for causal language models from Hugging Face repository""" def prepare_model(self) -> None: + if self.dynamic_batch_shape: + raise ValueError("The model does not support export with dynamic input shape") if self.backend in OV_BACKENDS + [BackendType.FP32]: self.model_hf = OVModelForCausalLM.from_pretrained(self.model_id, export=True, compile=False) self.model = self.model_hf.model diff --git a/tests/post_training/pipelines/image_classification_timm.py b/tests/post_training/pipelines/image_classification_timm.py index cf095260e17..c9a386d369d 100644 --- a/tests/post_training/pipelines/image_classification_timm.py +++ b/tests/post_training/pipelines/image_classification_timm.py @@ -36,16 +36,6 @@ # Disable using aten::scaled_dot_product_attention set_fused_attn(False, False) -BATCH_SIZE_NOT_A_DIVISOR_MESSAGE = ( - "The model validation will be done with batch_size=1 because the provided batch_size value " - "is not a divisor of the length of the validation dataset. The compressed model also " - "will be reshaped to a shape with batch_size=1." -) -BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE = ( - "To avoid model reshaping, please, provide the --batch_size option which " - "is a divisor of the length of the validation dataset." -) - class ImageClassificationTimm(PTQTestPipeline): """Pipeline for Image Classification model from timm repository""" @@ -57,13 +47,21 @@ def prepare_model(self) -> None: self.model_cfg = timm_model.default_cfg self.input_size = [self.batch_size] + list(timm_model.default_cfg["input_size"]) self.dummy_tensor = torch.rand(self.input_size) + if self.dynamic_batch_shape: + self.input_size[0] = -1 if self.backend in PT_BACKENDS: self.model = timm_model if self.backend == BackendType.ONNX: onnx_path = self.fp32_model_dir / "model_fp32.onnx" - torch.onnx.export(timm_model, self.dummy_tensor, onnx_path, export_params=True, opset_version=13) + additional_kwargs = {} + if self.dynamic_batch_shape: + additional_kwargs["input_names"] = ["image"] + additional_kwargs["dynamic_axes"] = {"image": {0: "batch"}} + torch.onnx.export( + timm_model, self.dummy_tensor, onnx_path, export_params=True, opset_version=13, **additional_kwargs + ) self.model = onnx.load(onnx_path) self.input_name = self.model.graph.input[0].name @@ -128,24 +126,20 @@ def prepare_calibration_dataset(self): def _validate(self): val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform) + val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) dataset_size = len(val_dataset) + if dataset_size % self.batch_size != 0 and not self.dynamic_batch_shape: + raise ValueError( + ( + "Because the batch_size is not a divisor of the length of the dataset, " + "the one of the data tensors has a shape incompatible with static model input. " + "Use --dynamic_batch_shape option to export such model with dynamic shape." + ) + ) + core = ov.Core() ov_model = core.read_model(self.path_compressed_ir) compiled_model = core.compile_model(ov_model) - if dataset_size % self.batch_size != 0: - print(BATCH_SIZE_NOT_A_DIVISOR_MESSAGE) - self.batch_size = 1 - try: - ov_model.reshape([self.batch_size, *self.input_size[1:]]) - except Exception as e: - print( - ( - f"During model reshaping the following error occurred: {os.linesep} {e} {os.linesep}" - f"{BATCH_SIZE_OPTION_RECOMMENDATION_MESSAGE}" - ) - ) - exit() - val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=2, shuffle=False) # Initialize result tensors for async inference support. predictions = np.zeros((dataset_size)) references = -1 * np.ones((dataset_size)) diff --git a/tests/post_training/pipelines/lm_weight_compression.py b/tests/post_training/pipelines/lm_weight_compression.py index 1d69967bce1..bb950172e7c 100644 --- a/tests/post_training/pipelines/lm_weight_compression.py +++ b/tests/post_training/pipelines/lm_weight_compression.py @@ -71,6 +71,8 @@ class LMWeightCompression(BaseTestPipeline): OV_MODEL_NAME = "openvino_model.xml" def prepare_model(self) -> None: + if self.dynamic_batch_shape: + raise ValueError("The model does not support export with dynamic input shape") is_stateful = self.params.get("is_stateful", False) if is_stateful: self.fp32_model_dir = self.fp32_model_dir.parent / (self.fp32_model_dir.name + "_sf") @@ -129,6 +131,9 @@ def transform_fn(data): return transform_fn def prepare_calibration_dataset(self): + if self.batch_size > 1: + print("Batch size > 1 is not supported for causal language models. Batch size = 1 is set.") + self.batch_size = 1 dataset = load_dataset("wikitext", "wikitext-2-v1", split="train", revision="b08601e") dataset = dataset.filter(lambda example: len(example["text"]) > 80) self.calibration_dataset = nncf.Dataset(dataset, self.get_transform_calibration_fn()) diff --git a/tests/post_training/pipelines/masked_language_modeling.py b/tests/post_training/pipelines/masked_language_modeling.py index 76cf3206f12..c268dee79ab 100644 --- a/tests/post_training/pipelines/masked_language_modeling.py +++ b/tests/post_training/pipelines/masked_language_modeling.py @@ -86,6 +86,8 @@ def transform_func(data): return transform_func def prepare_calibration_dataset(self): + if self.dynamic_batch_shape: + raise ValueError("The model does not support export with dynamic input shape") if self.batch_size > 1: print("Batch size > 1 is not supported for masked language models. Batch size = 1 is set.") self.batch_size = 1 diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py index e2472f76af1..6840930fc48 100644 --- a/tests/post_training/test_quantize_conformance.py +++ b/tests/post_training/test_quantize_conformance.py @@ -50,6 +50,11 @@ def fixture_batch_size(pytestconfig): return pytestconfig.getoption("batch_size") +@pytest.fixture(scope="session", name="dynamic_batch_shape") +def fixture_dynamic_batch_shape(pytestconfig): + return pytestconfig.getoption("dynamic_batch_shape") + + @pytest.fixture(scope="session", name="subset_size") def fixture_subset_size(pytestconfig): return pytestconfig.getoption("subset_size") @@ -202,6 +207,7 @@ def test_ptq_quantization( ptq_result_data: Dict[str, RunInfo], no_eval: bool, batch_size: int, + dynamic_batch_shape: bool, run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -231,6 +237,7 @@ def test_ptq_quantization( "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, "batch_size": batch_size, + "dynamic_batch_shape": dynamic_batch_shape, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs) @@ -269,6 +276,7 @@ def test_weight_compression( wc_result_data: Dict[str, RunInfo], no_eval: bool, batch_size: int, + dynamic_batch_shape: bool, run_fp32_backend: bool, run_torch_cuda_backend: bool, subset_size: Optional[int], @@ -294,6 +302,7 @@ def test_weight_compression( "no_eval": no_eval, "run_benchmark_app": run_benchmark_app, "batch_size": batch_size, + "dynamic_batch_shape": dynamic_batch_shape, } ) pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs)