Merge pull request #1118 from openvinotoolkit/ashwin/fix_non_determin…

…istic [Anomaly Task] Fix non deterministic + sample.py
openvinotoolkit · May 31, 2022 · 56e6624 · 56e6624
2 parents 2e18117 + 9ef77d0
commit 56e6624
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 10 deletions.
diff --git a/external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py b/external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py
@@ -184,7 +184,7 @@ def create_task_annotations(task: str, data_path: str, annotation_path: str) ->
     Raises:
         ValueError: When task is not classification, detection or segmentation.
     """
-    annotation_path = os.path.join(data_path, task)
+    annotation_path = os.path.join(annotation_path, task)
     os.makedirs(annotation_path, exist_ok=True)
 
     for split in ["train", "val", "test"]:

diff --git a/external/anomaly/ote_anomalib/train_task.py b/external/anomaly/ote_anomalib/train_task.py
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions
 # and limitations under the License.
 
+from typing import Optional
+
 from anomalib.utils.callbacks import MinMaxNormalizationCallback
 from ote_anomalib import AnomalyInferenceTask
 from ote_anomalib.callbacks import ProgressCallback
@@ -23,7 +25,7 @@
 from ote_sdk.entities.model import ModelEntity
 from ote_sdk.entities.train_parameters import TrainParameters
 from ote_sdk.usecases.tasks.interfaces.training_interface import ITrainingTask
-from pytorch_lightning import Trainer
+from pytorch_lightning import Trainer, seed_everything
 
 logger = get_logger(__name__)
 
@@ -36,17 +38,26 @@ def train(
         dataset: DatasetEntity,
         output_model: ModelEntity,
         train_parameters: TrainParameters,
+        seed: Optional[int] = None,
     ) -> None:
         """Train the anomaly classification model.
 
         Args:
             dataset (DatasetEntity): Input dataset.
             output_model (ModelEntity): Output model to save the model weights.
             train_parameters (TrainParameters): Training parameters
+            seed: (Optional[int]): Setting seed to a value other than 0 also marks PytorchLightning trainer's
+                deterministic flag to True.
         """
         logger.info("Training the model.")
 
         config = self.get_config()
+
+        if seed:
+            logger.info(f"Setting seed to {seed}")
+            seed_everything(seed, workers=True)
+            config.trainer.deterministic = True
+
         logger.info("Training Configs '%s'", config)
 
         datamodule = OTEAnomalyDataModule(config=config, dataset=dataset, task_type=self.task_type)

diff --git a/external/anomaly/tests/test_ote_training.py b/external/anomaly/tests/test_ote_training.py
@@ -238,7 +238,8 @@ def _run_ote_training(self, data_collector):
         self.copy_hyperparams = deepcopy(self.task.task_environment.get_hyper_parameters())
 
         try:
-            self.task.train(self.dataset, self.output_model, TrainParameters)
+            # fix seed so that result is repeatable
+            self.task.train(self.dataset, self.output_model, TrainParameters, seed=42)
         except Exception as ex:
             raise RuntimeError("Training failed") from ex
 

diff --git a/external/anomaly/tools/README.md b/external/anomaly/tools/README.md
@@ -0,0 +1,23 @@
+OpenVINO Training Extension interacts with the anomaly detection library ([Anomalib](https://github.com/openvinotoolkit/anomalib)) by providing interfaces in the `external/anomaly` of this repository. The `sample.py` file contained in this folder serves as an end-to-end example of how these interfaces are used. To begin using this script, first ensure that `ote_cli`, `ote_sdk` and `external/anomaly` dependencies are installed.
+
+To get started, we provide a handy script in `ote_anomalib/data/create_mvtec_ad_json_annotations.py` to help generate annotation json files for MVTec dataset. Assuming that you have placed the MVTec dataset in a directory your home folder (`~/dataset/MVTec`), you can run the following command to generate the annotations.
+
+```bash
+python create_mvtec_ad_json_annotations.py --data_path ~/datasets/MVTec --annotation_path ~/training_extensions/data/MVtec/
+```
+
+This will generate three folders in `~/training_extensions/data/MVtec/` for classification, segmentation and detection task.
+
+Then, to run sample.py you can use the following command.
+
+```bash
+python tools/sample.py \
+    --dataset_path ~/datasets/MVTec \
+    --category bottle \
+    --train-ann-files ../../data/MVtec/bottle/segmentation/train.json \
+    --val-ann-files ../../data/MVtec/bottle/segmentation/val.json \
+    --test-ann-files ../../data/MVtec/bottle/segmentation/test.json \
+    --model_template_path ./configs/anomaly_segmentation/padim/template.yaml
+```
+
+Optionally, you can also optimize to `nncf` or `pot` by using the `--optimization` flag
diff --git a/external/anomaly/tools/sample.py b/external/anomaly/tools/sample.py
@@ -22,7 +22,7 @@
 import os
 import shutil
 from argparse import Namespace
-from typing import Any, Dict, Type, Union
+from typing import Any, Dict, Optional, Type, Union
 
 from ote_anomalib import AnomalyNNCFTask, OpenVINOAnomalyTask
 from ote_anomalib.data.dataset import (
@@ -61,13 +61,18 @@ def __init__(
         val_subset: Dict[str, str],
         test_subset: Dict[str, str],
         model_template_path: str,
+        seed: Optional[int] = None,
     ) -> None:
         """Initialize OteAnomalyTask.
 
         Args:
             dataset_path (str): Path to the MVTec dataset.
-            seed (int): Seed to split the dataset into train/val/test splits.
+            train_subset (Dict[str, str]): Dictionary containing path to train annotation file and path to dataset.
+            val_subset (Dict[str, str]): Dictionary containing path to validation annotation file and path to dataset.
+            test_subset (Dict[str, str]): Dictionary containing path to test annotation file and path to dataset.
             model_template_path (str): Path to model template.
+            seed (Optional[int]): Setting seed to a value other than 0 also marks PytorchLightning trainer's
+                deterministic flag to True.
 
         Example:
             >>> import os
@@ -78,9 +83,12 @@ def __init__(
 
             >>> model_template_path = "./configs/anomaly_classification/padim/template.yaml"
             >>> dataset_path = "./datasets/MVTec"
-            >>> seed = 0
             >>> task = OteAnomalyTask(
-            ...     dataset_path=dataset_path, seed=seed, model_template_path=model_template_path
+            ...     dataset_path=dataset_path,
+            ...     train_subset={"ann_file": train.json, "data_root": dataset_path},
+            ...     val_subset={"ann_file": val.json, "data_root": dataset_path},
+            ...     test_subset={"ann_file": test.json, "data_root": dataset_path},
+            ...     model_template_path=model_template_path
             ... )
 
             >>> task.train()
@@ -110,6 +118,7 @@ def __init__(
         self.openvino_task: OpenVINOAnomalyTask
         self.nncf_task: AnomalyNNCFTask
         self.results = {"category": dataset_path}
+        self.seed = seed
 
     def get_dataclass(
         self,
@@ -176,9 +185,7 @@ def train(self) -> ModelEntity:
             configuration=self.task_environment.get_model_configuration(),
         )
         self.torch_task.train(
-            dataset=self.dataset,
-            output_model=output_model,
-            train_parameters=TrainParameters(),
+            dataset=self.dataset, output_model=output_model, train_parameters=TrainParameters(), seed=self.seed
         )
 
         logger.info("Inferring the base torch model on the validation set.")
@@ -364,6 +371,7 @@ def main() -> None:
         val_subset=val_subset,
         test_subset=test_subset,
         model_template_path=args.model_template_path,
+        seed=args.seed,
     )
 
     task.train()