Check for multi-machine in cityscapes evaluator

Summary: 1. Compare `local_size` vs world size so the check works on CPUs as well. This fixes facebookresearch#3810 2. Move the comparison into the evaluator to reduce code duplicate 3. Run linter Pull Request resolved: facebookresearch#3848 Reviewed By: tglik Differential Revision: D33392661 Pulled By: zhanghang1989 fbshipit-source-id: 2d982135a21e572f9a2496d343af6390aa14990c
yusiyoh · Jan 4, 2022 · bb96d0b · bb96d0b
1 parent f7bc78e
commit bb96d0b
Show file tree

Hide file tree

Showing 8 changed files with 33 additions and 50 deletions.
diff --git a/detectron2/evaluation/cityscapes_evaluation.py b/detectron2/evaluation/cityscapes_evaluation.py
@@ -36,6 +36,9 @@ def reset(self):
         self._temp_dir = self._working_dir.name
         # All workers will write to the same results directory
         # TODO this does not work in distributed training
+        assert (
+            comm.get_local_size() == comm.get_world_size()
+        ), "CityscapesEvaluator currently do not work with multiple machines."
         self._temp_dir = comm.all_gather(self._temp_dir)[0]
         if self._temp_dir != self._working_dir.name:
             self._working_dir.cleanup()

diff --git a/detectron2/tracking/base_tracker.py b/detectron2/tracking/base_tracker.py
@@ -1,10 +1,9 @@
 #!/usr/bin/env python3
 # Copyright 2004-present Facebook. All Rights Reserved.
-from ..structures import Instances
-from detectron2.utils.registry import Registry
-from ..config.config import CfgNode as CfgNode_
+from detectron2.config import CfgNode as CfgNode_
 from detectron2.config import configurable
-
+from detectron2.structures import Instances
+from detectron2.utils.registry import Registry
 
 TRACKER_HEADS_REGISTRY = Registry("TRACKER_HEADS")
 TRACKER_HEADS_REGISTRY.__doc__ = """
@@ -16,6 +15,7 @@ class BaseTracker(object):
     """
     A parent class for all trackers
     """
+
     @configurable
     def __init__(self, **kwargs):
         self._prev_instances = None  # (D2)instances for previous frame

diff --git a/projects/DeepLab/train_net.py b/projects/DeepLab/train_net.py
@@ -66,9 +66,6 @@ def build_evaluator(cls, cfg, dataset_name, output_folder=None):
                 output_dir=output_folder,
             )
         if evaluator_type == "cityscapes_sem_seg":
-            assert (
-                torch.cuda.device_count() > comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
             return CityscapesSemSegEvaluator(dataset_name)
         if len(evaluator_list) == 0:
             raise NotImplementedError(

diff --git a/projects/Panoptic-DeepLab/train_net.py b/projects/Panoptic-DeepLab/train_net.py
@@ -68,9 +68,6 @@ def build_evaluator(cls, cfg, dataset_name, output_folder=None):
         if evaluator_type in ["cityscapes_panoptic_seg", "coco_panoptic_seg"]:
             evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
         if evaluator_type == "cityscapes_panoptic_seg":
-            assert (
-                torch.cuda.device_count() > comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
             evaluator_list.append(CityscapesSemSegEvaluator(dataset_name))
             evaluator_list.append(CityscapesInstanceEvaluator(dataset_name))
         if evaluator_type == "coco_panoptic_seg":

diff --git a/projects/PointRend/train_net.py b/projects/PointRend/train_net.py
@@ -80,14 +80,8 @@ def build_evaluator(cls, cfg, dataset_name, output_folder=None):
                 output_dir=output_folder,
             )
         if evaluator_type == "cityscapes_instance":
-            assert (
-                torch.cuda.device_count() > comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
             return CityscapesInstanceEvaluator(dataset_name)
         if evaluator_type == "cityscapes_sem_seg":
-            assert (
-                torch.cuda.device_count() > comm.get_rank()
-            ), "CityscapesEvaluator currently do not work with multiple machines."
             return CityscapesSemSegEvaluator(dataset_name)
         if len(evaluator_list) == 0:
             raise NotImplementedError(

diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
@@ -1,14 +1,14 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 
 import math
-from unittest import TestCase
-
 import numpy as np
+from unittest import TestCase
 import torch
-from detectron2.solver import LRMultiplier, WarmupParamScheduler, build_lr_scheduler
 from fvcore.common.param_scheduler import CosineParamScheduler, MultiStepParamScheduler
 from torch import nn
 
+from detectron2.solver import LRMultiplier, WarmupParamScheduler, build_lr_scheduler
+
 
 class TestScheduler(TestCase):
     def test_warmup_multistep(self):
@@ -92,24 +92,28 @@ def _test_end_value(cfg_dict):
 
             self.assertAlmostEqual(lrs[-1], cfg.SOLVER.BASE_LR_END)
 
-        _test_end_value({
-            "SOLVER": {
-                "LR_SCHEDULER_NAME": "WarmupCosineLR",
-                "MAX_ITER": 100,
-                "WARMUP_ITERS": 10,
-                "WARMUP_FACTOR": 0.1,
-                "BASE_LR": 5.0,
-                "BASE_LR_END": 0.0,
+        _test_end_value(
+            {
+                "SOLVER": {
+                    "LR_SCHEDULER_NAME": "WarmupCosineLR",
+                    "MAX_ITER": 100,
+                    "WARMUP_ITERS": 10,
+                    "WARMUP_FACTOR": 0.1,
+                    "BASE_LR": 5.0,
+                    "BASE_LR_END": 0.0,
+                }
             }
-        })
-
-        _test_end_value({
-            "SOLVER": {
-                "LR_SCHEDULER_NAME": "WarmupCosineLR",
-                "MAX_ITER": 100,
-                "WARMUP_ITERS": 10,
-                "WARMUP_FACTOR": 0.1,
-                "BASE_LR": 5.0,
-                "BASE_LR_END": 0.5,
+        )
+
+        _test_end_value(
+            {
+                "SOLVER": {
+                    "LR_SCHEDULER_NAME": "WarmupCosineLR",
+                    "MAX_ITER": 100,
+                    "WARMUP_ITERS": 10,
+                    "WARMUP_FACTOR": 0.1,
+                    "BASE_LR": 5.0,
+                    "BASE_LR_END": 0.5,
+                }
             }
-        })
+        )
diff --git a/tools/plain_train_net.py b/tools/plain_train_net.py
@@ -77,14 +77,8 @@ def get_evaluator(cfg, dataset_name, output_folder=None):
     if evaluator_type == "coco_panoptic_seg":
         evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
     if evaluator_type == "cityscapes_instance":
-        assert (
-            torch.cuda.device_count() > comm.get_rank()
-        ), "CityscapesEvaluator currently do not work with multiple machines."
         return CityscapesInstanceEvaluator(dataset_name)
     if evaluator_type == "cityscapes_sem_seg":
-        assert (
-            torch.cuda.device_count() > comm.get_rank()
-        ), "CityscapesEvaluator currently do not work with multiple machines."
         return CityscapesSemSegEvaluator(dataset_name)
     if evaluator_type == "pascal_voc":
         return PascalVOCDetectionEvaluator(dataset_name)

diff --git a/tools/train_net.py b/tools/train_net.py
@@ -64,14 +64,8 @@ def build_evaluator(cfg, dataset_name, output_folder=None):
     if evaluator_type == "coco_panoptic_seg":
         evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
     if evaluator_type == "cityscapes_instance":
-        assert (
-            torch.cuda.device_count() > comm.get_rank()
-        ), "CityscapesEvaluator currently do not work with multiple machines."
         return CityscapesInstanceEvaluator(dataset_name)
     if evaluator_type == "cityscapes_sem_seg":
-        assert (
-            torch.cuda.device_count() > comm.get_rank()
-        ), "CityscapesEvaluator currently do not work with multiple machines."
         return CityscapesSemSegEvaluator(dataset_name)
     elif evaluator_type == "pascal_voc":
         return PascalVOCDetectionEvaluator(dataset_name)