From b7c30a240f2cf389a495fd44ddc3df806d00bd0f Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Mon, 23 May 2022 15:38:59 +0200 Subject: [PATCH 1/3] Add seed + sample.py guide --- .../data/create_mvtec_ad_json_annotations.py | 2 +- external/anomaly/ote_anomalib/train_task.py | 12 +++++++++- external/anomaly/tests/test_ote_training.py | 3 ++- external/anomaly/tools/README.md | 17 ++++++++++++++ external/anomaly/tools/sample.py | 22 +++++++++++++------ 5 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 external/anomaly/tools/README.md diff --git a/external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py b/external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py index 5c9c98578e0..652764ae603 100644 --- a/external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py +++ b/external/anomaly/ote_anomalib/data/create_mvtec_ad_json_annotations.py @@ -184,7 +184,7 @@ def create_task_annotations(task: str, data_path: str, annotation_path: str) -> Raises: ValueError: When task is not classification, detection or segmentation. """ - annotation_path = os.path.join(data_path, task) + annotation_path = os.path.join(annotation_path, task) os.makedirs(annotation_path, exist_ok=True) for split in ["train", "val", "test"]: diff --git a/external/anomaly/ote_anomalib/train_task.py b/external/anomaly/ote_anomalib/train_task.py index d2de58ab9dd..59cabc427b7 100644 --- a/external/anomaly/ote_anomalib/train_task.py +++ b/external/anomaly/ote_anomalib/train_task.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions # and limitations under the License. +from typing import Optional from anomalib.utils.callbacks import MinMaxNormalizationCallback from ote_anomalib import AnomalyInferenceTask from ote_anomalib.callbacks import ProgressCallback @@ -23,7 +24,7 @@ from ote_sdk.entities.model import ModelEntity from ote_sdk.entities.train_parameters import TrainParameters from ote_sdk.usecases.tasks.interfaces.training_interface import ITrainingTask -from pytorch_lightning import Trainer +from pytorch_lightning import Trainer, seed_everything logger = get_logger(__name__) @@ -36,6 +37,7 @@ def train( dataset: DatasetEntity, output_model: ModelEntity, train_parameters: TrainParameters, + seed: Optional[int] = 0, ) -> None: """Train the anomaly classification model. @@ -43,10 +45,18 @@ def train( dataset (DatasetEntity): Input dataset. output_model (ModelEntity): Output model to save the model weights. train_parameters (TrainParameters): Training parameters + seed: (Optional[int]): Setting seed to a value other than 0 also marks PytorchLightning trainer's + deterministic flag to True. """ logger.info("Training the model.") config = self.get_config() + + if seed is not None and seed > 0: + logger.info(f"Setting seed to {seed}") + seed_everything(seed, workers=True) + config.trainer.deterministic = True + logger.info("Training Configs '%s'", config) datamodule = OTEAnomalyDataModule(config=config, dataset=dataset, task_type=self.task_type) diff --git a/external/anomaly/tests/test_ote_training.py b/external/anomaly/tests/test_ote_training.py index 7db6b88ed1e..1a7f3f54890 100644 --- a/external/anomaly/tests/test_ote_training.py +++ b/external/anomaly/tests/test_ote_training.py @@ -238,7 +238,8 @@ def _run_ote_training(self, data_collector): self.copy_hyperparams = deepcopy(self.task.task_environment.get_hyper_parameters()) try: - self.task.train(self.dataset, self.output_model, TrainParameters) + # fix seed so that result is repeatable + self.task.train(self.dataset, self.output_model, TrainParameters, seed=42) except Exception as ex: raise RuntimeError("Training failed") from ex diff --git a/external/anomaly/tools/README.md b/external/anomaly/tools/README.md new file mode 100644 index 00000000000..df6791776ac --- /dev/null +++ b/external/anomaly/tools/README.md @@ -0,0 +1,17 @@ +OpenVINO Training Extension interacts with the anomaly detection library ([Anomalib](https://github.com/openvinotoolkit/anomalib)) by providing interfaces in the `external/anomaly` of this repository. The `sample.py` file contained in this folder serves as an end-to-end example of how these interfaces are used. To begin using this script, first ensure that `ote_cli`, `ote_sdk` and `external/anomaly` dependencies are installed. + +To get started, we provide a handy script in `ote_anomalib/data/create_mvtec_ad_json_annotations.py` to help generate annotation json files for MVTec dataset. Assuming that you have placed the MVTec dataset in a directory your home folder (`~/dataset/MVTec`), you can run the following command to generate the annotations. + +```bash +python create_mvtec_ad_json_annotations.py --data_path ~/datasets/MVTec --annotation_path ~/training_extensions/data/MVtec/ +``` + +This will generate three folders in `~/training_extensions/data/MVtec/` for classification, segmentation and detection task. + +Then, to run sample.py you can use the following command. + +```bash +python tools/sample.py --dataset_path ~/datasets/MVTec --category bottle --train-ann-files ../../data/MVtec/bottle/segmentation/train.json --val-ann-files ../../data/MVtec/bottle/segmentation/val.json --test-ann-files ../../data/MVtec/bottle/segmentation/test.json --model_template_path ./configs/anomaly_segmentation/padim/template.yaml +``` + +Optionally, you can also optimize to `nncf` or `pot` by using the `--optimization` flag \ No newline at end of file diff --git a/external/anomaly/tools/sample.py b/external/anomaly/tools/sample.py index 72b7ad76118..06114cfa119 100644 --- a/external/anomaly/tools/sample.py +++ b/external/anomaly/tools/sample.py @@ -22,7 +22,7 @@ import os import shutil from argparse import Namespace -from typing import Any, Dict, Type, Union +from typing import Any, Dict, Optional, Type, Union from ote_anomalib import AnomalyNNCFTask, OpenVINOAnomalyTask from ote_anomalib.data.dataset import ( @@ -61,13 +61,18 @@ def __init__( val_subset: Dict[str, str], test_subset: Dict[str, str], model_template_path: str, + seed: Optional[int] = 0, ) -> None: """Initialize OteAnomalyTask. Args: dataset_path (str): Path to the MVTec dataset. - seed (int): Seed to split the dataset into train/val/test splits. + train_subset (Dict[str, str]): Dictionary containing path to train annotation file and path to dataset. + val_subset (Dict[str, str]): Dictionary containing path to validation annotation file and path to dataset. + test_subset (Dict[str, str]): Dictionary containing path to test annotation file and path to dataset. model_template_path (str): Path to model template. + seed (Optional[int]): Setting seed to a value other than 0 also marks PytorchLightning trainer's + deterministic flag to True. Example: >>> import os @@ -78,9 +83,12 @@ def __init__( >>> model_template_path = "./configs/anomaly_classification/padim/template.yaml" >>> dataset_path = "./datasets/MVTec" - >>> seed = 0 >>> task = OteAnomalyTask( - ... dataset_path=dataset_path, seed=seed, model_template_path=model_template_path + ... dataset_path=dataset_path, + ... train_subset={"ann_file": train.json, "data_root": dataset_path}, + ... val_subset={"ann_file": val.json, "data_root": dataset_path}, + ... test_subset={"ann_file": test.json, "data_root": dataset_path}, + ... model_template_path=model_template_path ... ) >>> task.train() @@ -110,6 +118,7 @@ def __init__( self.openvino_task: OpenVINOAnomalyTask self.nncf_task: AnomalyNNCFTask self.results = {"category": dataset_path} + self.seed = seed def get_dataclass( self, @@ -176,9 +185,7 @@ def train(self) -> ModelEntity: configuration=self.task_environment.get_model_configuration(), ) self.torch_task.train( - dataset=self.dataset, - output_model=output_model, - train_parameters=TrainParameters(), + dataset=self.dataset, output_model=output_model, train_parameters=TrainParameters(), seed=self.seed ) logger.info("Inferring the base torch model on the validation set.") @@ -364,6 +371,7 @@ def main() -> None: val_subset=val_subset, test_subset=test_subset, model_template_path=args.model_template_path, + seed=args.seed, ) task.train() From fdd5307c84929deae03b896e278664c33a870d2b Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Mon, 23 May 2022 15:39:37 +0200 Subject: [PATCH 2/3] spacing --- external/anomaly/ote_anomalib/train_task.py | 1 + 1 file changed, 1 insertion(+) diff --git a/external/anomaly/ote_anomalib/train_task.py b/external/anomaly/ote_anomalib/train_task.py index 59cabc427b7..a4a30a65926 100644 --- a/external/anomaly/ote_anomalib/train_task.py +++ b/external/anomaly/ote_anomalib/train_task.py @@ -15,6 +15,7 @@ # and limitations under the License. from typing import Optional + from anomalib.utils.callbacks import MinMaxNormalizationCallback from ote_anomalib import AnomalyInferenceTask from ote_anomalib.callbacks import ProgressCallback From 9ef77d07ba2dd2bad3da593dbca58c04a406c44c Mon Sep 17 00:00:00 2001 From: Ashwin Vaidya Date: Mon, 23 May 2022 16:01:37 +0200 Subject: [PATCH 3/3] Set optional params to None --- external/anomaly/ote_anomalib/train_task.py | 4 ++-- external/anomaly/tools/README.md | 8 +++++++- external/anomaly/tools/sample.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/external/anomaly/ote_anomalib/train_task.py b/external/anomaly/ote_anomalib/train_task.py index a4a30a65926..7dfa8ee61cd 100644 --- a/external/anomaly/ote_anomalib/train_task.py +++ b/external/anomaly/ote_anomalib/train_task.py @@ -38,7 +38,7 @@ def train( dataset: DatasetEntity, output_model: ModelEntity, train_parameters: TrainParameters, - seed: Optional[int] = 0, + seed: Optional[int] = None, ) -> None: """Train the anomaly classification model. @@ -53,7 +53,7 @@ def train( config = self.get_config() - if seed is not None and seed > 0: + if seed: logger.info(f"Setting seed to {seed}") seed_everything(seed, workers=True) config.trainer.deterministic = True diff --git a/external/anomaly/tools/README.md b/external/anomaly/tools/README.md index df6791776ac..5ec7bc57eb0 100644 --- a/external/anomaly/tools/README.md +++ b/external/anomaly/tools/README.md @@ -11,7 +11,13 @@ This will generate three folders in `~/training_extensions/data/MVtec/` for clas Then, to run sample.py you can use the following command. ```bash -python tools/sample.py --dataset_path ~/datasets/MVTec --category bottle --train-ann-files ../../data/MVtec/bottle/segmentation/train.json --val-ann-files ../../data/MVtec/bottle/segmentation/val.json --test-ann-files ../../data/MVtec/bottle/segmentation/test.json --model_template_path ./configs/anomaly_segmentation/padim/template.yaml +python tools/sample.py \ + --dataset_path ~/datasets/MVTec \ + --category bottle \ + --train-ann-files ../../data/MVtec/bottle/segmentation/train.json \ + --val-ann-files ../../data/MVtec/bottle/segmentation/val.json \ + --test-ann-files ../../data/MVtec/bottle/segmentation/test.json \ + --model_template_path ./configs/anomaly_segmentation/padim/template.yaml ``` Optionally, you can also optimize to `nncf` or `pot` by using the `--optimization` flag \ No newline at end of file diff --git a/external/anomaly/tools/sample.py b/external/anomaly/tools/sample.py index 06114cfa119..13bdb2f5c51 100644 --- a/external/anomaly/tools/sample.py +++ b/external/anomaly/tools/sample.py @@ -61,7 +61,7 @@ def __init__( val_subset: Dict[str, str], test_subset: Dict[str, str], model_template_path: str, - seed: Optional[int] = 0, + seed: Optional[int] = None, ) -> None: """Initialize OteAnomalyTask.