From c1f46a69f41da7a4cf7c59c4fd4d41501b68513b Mon Sep 17 00:00:00 2001
From: John Zhu <31381602+johnzja@users.noreply.github.com>
Date: Tue, 18 Aug 2020 23:33:05 +0800
Subject: [PATCH] Fast-SCNN implemented (#58)

* init commit: fast_scnn

* 247917iters

* 4x8_80k

* configs placed in configs_unify.  4x8_80k exp.running.

* mmseg/utils/collect_env.py modified to support Windows

* study on lr

* bug in configs_unify/***/cityscapes.py fixed.

* lr0.08_100k

* lr_power changed to 1.2

* log_config by_epoch set to False.

* lr1.2

* doc strings added

* add fast_scnn backbone  test

* 80k 0.08,0.12

* add 450k

* fast_scnn test: fix BN bug.

* Add different config files into configs/

* .gitignore recovered.

* configs_unify del

* .gitignore recovered.

* delete sub-optimal config files of fast-scnn

* Code style improved.

* add docstrings to component modules of fast-scnn

* relevant files modified according to Jerry's instructions

* relevant files modified according to Jerry's instructions

* lint problems fixed.

* fast_scnn config extremely simplified.

* InvertedResidual

* fixed padding problems

* add unit test for inverted_residual

* add unit test for inverted_residual: debug 0

* add unit test for inverted_residual: debug 1

* add unit test for inverted_residual: debug 2

* add unit test for inverted_residual: debug 3

* add unit test for sep_fcn_head: debug 0

* add unit test for sep_fcn_head: debug 1

* add unit test for sep_fcn_head: debug 2

* add unit test for sep_fcn_head: debug 3

* add unit test for sep_fcn_head: debug 4

* add unit test for sep_fcn_head: debug 5

* FastSCNN type(dwchannels) changed to tuple.

* t changed to expand_ratio.

* Spaces fixed.

* Update mmseg/models/backbones/fast_scnn.py

Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>

* Update mmseg/models/decode_heads/sep_fcn_head.py

Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>

* Update mmseg/models/decode_heads/sep_fcn_head.py

Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>

* Docstrings fixed.

* Docstrings fixed.

* Inverted Residual kept coherent with mmcl.

* Inverted Residual kept coherent with mmcl. Debug 0

* _make_layer parameters renamed.

* final commit

* Arg scale_factor deleted.

* Expand_ratio docstrings updated.

* final commit

* Readme for Fast-SCNN added.

* model-zoo.md modified.

* fast_scnn README updated.

* Move InvertedResidual module into mmseg/utils.

* test_inverted_residual module corrected.

* test_inverted_residual.py moved.

* encoder_decoder modified to avoid bugs when running PSPNet.
getting_started.md bug fixed.

* Revert "encoder_decoder modified to avoid bugs when running PSPNet. "

This reverts commit dd0aadfb

Co-authored-by: Jerry Jiarui XU <xvjiarui0826@gmail.com>
---
 configs/_base_/models/fast_scnn.py            |  58 +++
 configs/fastscnn/README.md                    |  18 +
 .../fast_scnn_4x8_80k_lr0.12_cityscapes.py    |  10 +
 docs/getting_started.md                       |   2 +-
 docs/model_zoo.md                             |   5 +-
 mmseg/models/backbones/__init__.py            |   6 +-
 mmseg/models/backbones/fast_scnn.py           | 385 ++++++++++++++++++
 mmseg/models/decode_heads/__init__.py         |   3 +-
 mmseg/models/decode_heads/fcn_head.py         |   1 +
 mmseg/models/decode_heads/sep_fcn_head.py     |  50 +++
 mmseg/utils/__init__.py                       |   6 +-
 mmseg/utils/inverted_residual_module.py       |  73 ++++
 tests/test_models/test_backbone.py            |  32 +-
 tests/test_models/test_heads.py               |  37 +-
 .../test_inverted_residual_module.py          |  40 ++
 15 files changed, 714 insertions(+), 12 deletions(-)
 create mode 100644 configs/_base_/models/fast_scnn.py
 create mode 100644 configs/fastscnn/README.md
 create mode 100644 configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py
 create mode 100644 mmseg/models/backbones/fast_scnn.py
 create mode 100644 mmseg/models/decode_heads/sep_fcn_head.py
 create mode 100644 mmseg/utils/inverted_residual_module.py
 create mode 100644 tests/test_utils/test_inverted_residual_module.py

diff --git a/configs/_base_/models/fast_scnn.py b/configs/_base_/models/fast_scnn.py
new file mode 100644
index 0000000..67ee0d3
--- /dev/null
+++ b/configs/_base_/models/fast_scnn.py
@@ -0,0 +1,58 @@
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
+model = dict(
+    type='EncoderDecoder',
+    backbone=dict(
+        type='FastSCNN',
+        downsample_dw_channels=(32, 48),
+        global_in_channels=64,
+        global_block_channels=(64, 96, 128),
+        global_block_strides=(2, 2, 1),
+        global_out_channels=128,
+        higher_in_channels=64,
+        lower_in_channels=128,
+        fusion_out_channels=128,
+        out_indices=(0, 1, 2),
+        norm_cfg=norm_cfg,
+        align_corners=False),
+    decode_head=dict(
+        type='DepthwiseSeparableFCNHead',
+        in_channels=128,
+        channels=128,
+        concat_input=False,
+        num_classes=19,
+        in_index=-1,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        loss_decode=dict(
+            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.)),
+    auxiliary_head=[
+        dict(
+            type='FCNHead',
+            in_channels=128,
+            channels=32,
+            num_convs=1,
+            num_classes=19,
+            in_index=-2,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+        dict(
+            type='FCNHead',
+            in_channels=64,
+            channels=32,
+            num_convs=1,
+            num_classes=19,
+            in_index=-3,
+            norm_cfg=norm_cfg,
+            concat_input=False,
+            align_corners=False,
+            loss_decode=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
+    ])
+
+# model training and testing settings
+train_cfg = dict()
+test_cfg = dict(mode='whole')
diff --git a/configs/fastscnn/README.md b/configs/fastscnn/README.md
new file mode 100644
index 0000000..dbfc0e4
--- /dev/null
+++ b/configs/fastscnn/README.md
@@ -0,0 +1,18 @@
+# Fast-SCNN for Semantic Segmentation
+
+## Introduction
+```
+@article{poudel2019fast,
+  title={Fast-scnn: Fast semantic segmentation network},
+  author={Poudel, Rudra PK and Liwicki, Stephan and Cipolla, Roberto},
+  journal={arXiv preprint arXiv:1902.04502},
+  year={2019}
+}
+```
+
+## Results and models
+
+### Cityscapes
+|   Method   | Backbone  | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU  | mIoU(ms+flip) |                                                                                                                                                                                              download                                                                                              |
+|------------|-----------|-----------|--------:|----------|----------------|------:|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Fast-SCNN  | Fast-SCNN | 512x1024  |   80000 |      8.4 |          63.61 | 69.06 | -             | [model](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-cae6c46a.pth) &#124; [log](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_4x8_80k_lr0.12_cityscapes-20200807_165744.log.json)     |
diff --git a/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py b/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py
new file mode 100644
index 0000000..53fcfc4
--- /dev/null
+++ b/configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py
@@ -0,0 +1,10 @@
+_base_ = [
+    '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py',
+    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
+]
+
+# Re-config the data sampler.
+data = dict(samples_per_gpu=8, workers_per_gpu=4)
+
+# Re-config the optimizer.
+optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5)
diff --git a/docs/getting_started.md b/docs/getting_started.md
index bf49802..a4ab035 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -338,7 +338,7 @@ The final output filename will be `psp_r50_512x1024_40ki_cityscapes-{hash id}.pt
 We provide a script to convert model to [ONNX](https://github.com/onnx/onnx) format. The converted model could be visualized by tools like [Netron](https://github.com/lutzroeder/netron). Besides, we also support comparing the output results between Pytorch and ONNX model.
 
 ```shell
-python tools/pytorch2onnx.py ${CONFIG_FILE} --checkpoint ${CHECKPOINT_FILE} --output_file ${ONNX_FILE} [--shape ${INPUT_SHAPE} --verify]
+python tools/pytorch2onnx.py ${CONFIG_FILE} --checkpoint ${CHECKPOINT_FILE} --output-file ${ONNX_FILE} [--shape ${INPUT_SHAPE} --verify]
 ```
 
 **Note**: This tool is still experimental. Some customized operators are not supported for now.
diff --git a/docs/model_zoo.md b/docs/model_zoo.md
index aa5c4eb..3919a49 100644
--- a/docs/model_zoo.md
+++ b/docs/model_zoo.md
@@ -81,11 +81,14 @@ Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/c
 
 Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details.
 
+### Fast-SCNN
+
+Please refer to [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) for details.
+
 ### ResNeSt
 
 Please refer to [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) for details.
 
-
 ### Mixed Precision (FP16) Training
 
 Please refer [Mixed Precision (FP16) Training](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fp16/README.md) for details.
diff --git a/mmseg/models/backbones/__init__.py b/mmseg/models/backbones/__init__.py
index 3592424..0cb2ec1 100644
--- a/mmseg/models/backbones/__init__.py
+++ b/mmseg/models/backbones/__init__.py
@@ -1,6 +1,10 @@
+from .fast_scnn import FastSCNN
 from .hrnet import HRNet
 from .resnest import ResNeSt
 from .resnet import ResNet, ResNetV1c, ResNetV1d
 from .resnext import ResNeXt
 
-__all__ = ['ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'ResNeSt']
+__all__ = [
+    'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
+    'ResNeSt'
+]
diff --git a/mmseg/models/backbones/fast_scnn.py b/mmseg/models/backbones/fast_scnn.py
new file mode 100644
index 0000000..d94f52c
--- /dev/null
+++ b/mmseg/models/backbones/fast_scnn.py
@@ -0,0 +1,385 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, kaiming_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmseg.models.decode_heads.psp_head import PPM
+from mmseg.ops import DepthwiseSeparableConvModule, resize
+from mmseg.utils import InvertedResidual
+from ..builder import BACKBONES
+
+
+class LearningToDownsample(nn.Module):
+    """Learning to downsample module.
+
+    Args:
+        in_channels (int): Number of input channels.
+        dw_channels (tuple[int]): Number of output channels of the first and
+            the second depthwise conv (dwconv) layers.
+        out_channels (int): Number of output channels of the whole
+            'learning to downsample' module.
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+    """
+
+    def __init__(self,
+                 in_channels,
+                 dw_channels,
+                 out_channels,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU')):
+        super(LearningToDownsample, self).__init__()
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        dw_channels1 = dw_channels[0]
+        dw_channels2 = dw_channels[1]
+
+        self.conv = ConvModule(
+            in_channels,
+            dw_channels1,
+            3,
+            stride=2,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.dsconv1 = DepthwiseSeparableConvModule(
+            dw_channels1,
+            dw_channels2,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg)
+        self.dsconv2 = DepthwiseSeparableConvModule(
+            dw_channels2,
+            out_channels,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_cfg=self.norm_cfg)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.dsconv1(x)
+        x = self.dsconv2(x)
+        return x
+
+
+class GlobalFeatureExtractor(nn.Module):
+    """Global feature extractor module.
+
+    Args:
+        in_channels (int): Number of input channels of the GFE module.
+            Default: 64
+        block_channels (tuple[int]): Tuple of ints. Each int specifies the
+            number of output channels of each Inverted Residual module.
+            Default: (64, 96, 128)
+        out_channels(int): Number of output channels of the GFE module.
+            Default: 128
+        expand_ratio (int): Adjusts number of channels of the hidden layer
+            in InvertedResidual by this amount.
+            Default: 6
+        num_blocks (tuple[int]): Tuple of ints. Each int specifies the
+            number of times each Inverted Residual module is repeated.
+            The repeated Inverted Residual modules are called a 'group'.
+            Default: (3, 3, 3)
+        strides (tuple[int]): Tuple of ints. Each int specifies
+            the downsampling factor of each 'group'.
+            Default: (2, 2, 1)
+        pool_scales (tuple[int]): Tuple of ints. Each int specifies
+            the parameter required in 'global average pooling' within PPM.
+            Default: (1, 2, 3, 6)
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False
+    """
+
+    def __init__(self,
+                 in_channels=64,
+                 block_channels=(64, 96, 128),
+                 out_channels=128,
+                 expand_ratio=6,
+                 num_blocks=(3, 3, 3),
+                 strides=(2, 2, 1),
+                 pool_scales=(1, 2, 3, 6),
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 align_corners=False):
+        super(GlobalFeatureExtractor, self).__init__()
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        assert len(block_channels) == len(num_blocks) == 3
+        self.bottleneck1 = self._make_layer(in_channels, block_channels[0],
+                                            num_blocks[0], strides[0],
+                                            expand_ratio)
+        self.bottleneck2 = self._make_layer(block_channels[0],
+                                            block_channels[1], num_blocks[1],
+                                            strides[1], expand_ratio)
+        self.bottleneck3 = self._make_layer(block_channels[1],
+                                            block_channels[2], num_blocks[2],
+                                            strides[2], expand_ratio)
+        self.ppm = PPM(
+            pool_scales,
+            block_channels[2],
+            block_channels[2] // 4,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=align_corners)
+        self.out = ConvModule(
+            block_channels[2] * 2,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+
+    def _make_layer(self,
+                    in_channels,
+                    out_channels,
+                    blocks,
+                    stride=1,
+                    expand_ratio=6):
+        layers = [
+            InvertedResidual(
+                in_channels,
+                out_channels,
+                stride,
+                expand_ratio,
+                norm_cfg=self.norm_cfg)
+        ]
+        for i in range(1, blocks):
+            layers.append(
+                InvertedResidual(
+                    out_channels,
+                    out_channels,
+                    1,
+                    expand_ratio,
+                    norm_cfg=self.norm_cfg))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.bottleneck1(x)
+        x = self.bottleneck2(x)
+        x = self.bottleneck3(x)
+        x = torch.cat([x, *self.ppm(x)], dim=1)
+        x = self.out(x)
+        return x
+
+
+class FeatureFusionModule(nn.Module):
+    """Feature fusion module.
+
+    Args:
+        higher_in_channels (int): Number of input channels of the
+            higher-resolution branch.
+        lower_in_channels (int): Number of input channels of the
+            lower-resolution branch.
+        out_channels (int): Number of output channels.
+        scale_factor (int): Scale factor applied to the lower-res input.
+            Should be coherent with the downsampling factor determined
+            by the GFE module.
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False
+    """
+
+    def __init__(self,
+                 higher_in_channels,
+                 lower_in_channels,
+                 out_channels,
+                 scale_factor,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 align_corners=False):
+        super(FeatureFusionModule, self).__init__()
+        self.scale_factor = scale_factor
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+        self.dwconv = ConvModule(
+            lower_in_channels,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.conv_lower_res = ConvModule(
+            out_channels,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.conv_higher_res = ConvModule(
+            higher_in_channels,
+            out_channels,
+            1,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=None)
+        self.relu = nn.ReLU(True)
+
+    def forward(self, higher_res_feature, lower_res_feature):
+        lower_res_feature = resize(
+            lower_res_feature,
+            scale_factor=self.scale_factor,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        lower_res_feature = self.dwconv(lower_res_feature)
+        lower_res_feature = self.conv_lower_res(lower_res_feature)
+
+        higher_res_feature = self.conv_higher_res(higher_res_feature)
+        out = higher_res_feature + lower_res_feature
+        return self.relu(out)
+
+
+@BACKBONES.register_module()
+class FastSCNN(nn.Module):
+    """Fast-SCNN Backbone.
+
+    Args:
+        in_channels (int): Number of input image channels. Default: 3.
+        downsample_dw_channels (tuple[int]): Number of output channels after
+            the first conv layer & the second conv layer in
+            Learning-To-Downsample (LTD) module.
+            Default: (32, 48).
+        global_in_channels (int): Number of input channels of
+            Global Feature Extractor(GFE).
+            Equal to number of output channels of LTD.
+            Default: 64.
+        global_block_channels (tuple[int]): Tuple of integers that describe
+            the output channels for each of the MobileNet-v2 bottleneck
+            residual blocks in GFE.
+            Default: (64, 96, 128).
+        global_block_strides (tuple[int]): Tuple of integers
+            that describe the strides (downsampling factors) for each of the
+            MobileNet-v2 bottleneck residual blocks in GFE.
+            Default: (2, 2, 1).
+        global_out_channels (int): Number of output channels of GFE.
+            Default: 128.
+        higher_in_channels (int): Number of input channels of the higher
+            resolution branch in FFM.
+            Equal to global_in_channels.
+            Default: 64.
+        lower_in_channels (int): Number of input channels of  the lower
+            resolution branch in FFM.
+            Equal to global_out_channels.
+            Default: 128.
+        fusion_out_channels (int): Number of output channels of FFM.
+            Default: 128.
+        out_indices (tuple): Tuple of indices of list
+            [higher_res_features, lower_res_features, fusion_output].
+            Often set to (0,1,2) to enable aux. heads.
+            Default: (0, 1, 2).
+        conv_cfg (dict | None): Config of conv layers. Default: None
+        norm_cfg (dict | None): Config of norm layers. Default:
+            dict(type='BN')
+        act_cfg (dict): Config of activation layers. Default:
+            dict(type='ReLU')
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False
+    """
+
+    def __init__(self,
+                 in_channels=3,
+                 downsample_dw_channels=(32, 48),
+                 global_in_channels=64,
+                 global_block_channels=(64, 96, 128),
+                 global_block_strides=(2, 2, 1),
+                 global_out_channels=128,
+                 higher_in_channels=64,
+                 lower_in_channels=128,
+                 fusion_out_channels=128,
+                 out_indices=(0, 1, 2),
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 align_corners=False):
+
+        super(FastSCNN, self).__init__()
+        if global_in_channels != higher_in_channels:
+            raise AssertionError('Global Input Channels must be the same \
+                                 with Higher Input Channels!')
+        elif global_out_channels != lower_in_channels:
+            raise AssertionError('Global Output Channels must be the same \
+                                with Lower Input Channels!')
+
+        # Calculate scale factor used in FFM.
+        self.scale_factor = 1
+        for factor in global_block_strides:
+            self.scale_factor *= factor
+
+        self.in_channels = in_channels
+        self.downsample_dw_channels1 = downsample_dw_channels[0]
+        self.downsample_dw_channels2 = downsample_dw_channels[1]
+        self.global_in_channels = global_in_channels
+        self.global_block_channels = global_block_channels
+        self.global_block_strides = global_block_strides
+        self.global_out_channels = global_out_channels
+        self.higher_in_channels = higher_in_channels
+        self.lower_in_channels = lower_in_channels
+        self.fusion_out_channels = fusion_out_channels
+        self.out_indices = out_indices
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.align_corners = align_corners
+        self.learning_to_downsample = LearningToDownsample(
+            in_channels,
+            downsample_dw_channels,
+            global_in_channels,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg)
+        self.global_feature_extractor = GlobalFeatureExtractor(
+            global_in_channels,
+            global_block_channels,
+            global_out_channels,
+            strides=self.global_block_strides,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=self.align_corners)
+        self.feature_fusion = FeatureFusionModule(
+            higher_in_channels,
+            lower_in_channels,
+            fusion_out_channels,
+            scale_factor=self.scale_factor,
+            conv_cfg=self.conv_cfg,
+            norm_cfg=self.norm_cfg,
+            act_cfg=self.act_cfg,
+            align_corners=self.align_corners)
+
+    def init_weights(self, pretrained=None):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                kaiming_init(m)
+            elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                constant_init(m, 1)
+
+    def forward(self, x):
+        higher_res_features = self.learning_to_downsample(x)
+        lower_res_features = self.global_feature_extractor(higher_res_features)
+        fusion_output = self.feature_fusion(higher_res_features,
+                                            lower_res_features)
+
+        outs = [higher_res_features, lower_res_features, fusion_output]
+        outs = [outs[i] for i in self.out_indices]
+        return tuple(outs)
diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py
index fda4309..a6ead50 100644
--- a/mmseg/models/decode_heads/__init__.py
+++ b/mmseg/models/decode_heads/__init__.py
@@ -10,10 +10,11 @@
 from .psa_head import PSAHead
 from .psp_head import PSPHead
 from .sep_aspp_head import DepthwiseSeparableASPPHead
+from .sep_fcn_head import DepthwiseSeparableFCNHead
 from .uper_head import UPerHead
 
 __all__ = [
     'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
     'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
-    'EncHead'
+    'EncHead', 'DepthwiseSeparableFCNHead'
 ]
diff --git a/mmseg/models/decode_heads/fcn_head.py b/mmseg/models/decode_heads/fcn_head.py
index e586a2e..ff48b51 100644
--- a/mmseg/models/decode_heads/fcn_head.py
+++ b/mmseg/models/decode_heads/fcn_head.py
@@ -27,6 +27,7 @@ def __init__(self,
         assert num_convs > 0
         self.num_convs = num_convs
         self.concat_input = concat_input
+        self.kernel_size = kernel_size
         super(FCNHead, self).__init__(**kwargs)
         convs = []
         convs.append(
diff --git a/mmseg/models/decode_heads/sep_fcn_head.py b/mmseg/models/decode_heads/sep_fcn_head.py
new file mode 100644
index 0000000..1877951
--- /dev/null
+++ b/mmseg/models/decode_heads/sep_fcn_head.py
@@ -0,0 +1,50 @@
+from mmseg.ops import DepthwiseSeparableConvModule
+from ..builder import HEADS
+from .fcn_head import FCNHead
+
+
+@HEADS.register_module()
+class DepthwiseSeparableFCNHead(FCNHead):
+    """Depthwise-Separable Fully Convolutional Network for Semantic
+    Segmentation.
+
+    This head is implemented according to Fast-SCNN paper.
+    Args:
+        in_channels(int): Number of output channels of FFM.
+        channels(int): Number of middle-stage channels in the decode head.
+        concat_input(bool): Whether to concatenate original decode input into
+            the result of several consecutive convolution layers.
+            Default: True.
+        num_classes(int): Used to determine the dimension of
+            final prediction tensor.
+        in_index(int): Correspond with 'out_indices' in FastSCNN backbone.
+        norm_cfg (dict | None): Config of norm layers.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        loss_decode(dict): Config of loss type and some
+            relevant additional options.
+    """
+
+    def __init__(self, **kwargs):
+        super(DepthwiseSeparableFCNHead, self).__init__(**kwargs)
+        self.convs[0] = DepthwiseSeparableConvModule(
+            self.in_channels,
+            self.channels,
+            kernel_size=self.kernel_size,
+            padding=self.kernel_size // 2,
+            norm_cfg=self.norm_cfg)
+        for i in range(1, self.num_convs):
+            self.convs[i] = DepthwiseSeparableConvModule(
+                self.channels,
+                self.channels,
+                kernel_size=self.kernel_size,
+                padding=self.kernel_size // 2,
+                norm_cfg=self.norm_cfg)
+
+        if self.concat_input:
+            self.conv_cat = DepthwiseSeparableConvModule(
+                self.in_channels + self.channels,
+                self.channels,
+                kernel_size=self.kernel_size,
+                padding=self.kernel_size // 2,
+                norm_cfg=self.norm_cfg)
diff --git a/mmseg/utils/__init__.py b/mmseg/utils/__init__.py
index e7d2867..5b54059 100644
--- a/mmseg/utils/__init__.py
+++ b/mmseg/utils/__init__.py
@@ -1,7 +1,5 @@
 from .collect_env import collect_env
+from .inverted_residual_module import InvertedResidual
 from .logger import get_root_logger
 
-__all__ = [
-    'get_root_logger',
-    'collect_env',
-]
+__all__ = ['get_root_logger', 'collect_env', 'InvertedResidual']
diff --git a/mmseg/utils/inverted_residual_module.py b/mmseg/utils/inverted_residual_module.py
new file mode 100644
index 0000000..ff33a36
--- /dev/null
+++ b/mmseg/utils/inverted_residual_module.py
@@ -0,0 +1,73 @@
+from mmcv.cnn import ConvModule, build_norm_layer
+from torch import nn
+
+
+class InvertedResidual(nn.Module):
+    """Inverted residual module.
+
+    Args:
+        in_channels (int): The input channels of the InvertedResidual block.
+        out_channels (int): The output channels of the InvertedResidual block.
+        stride (int): Stride of the middle (first) 3x3 convolution.
+        expand_ratio (int): adjusts number of channels of the hidden layer
+            in InvertedResidual by this amount.
+        conv_cfg (dict): Config dict for convolution layer.
+            Default: None, which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='ReLU6').
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 expand_ratio,
+                 dilation=1,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU6')):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+
+        hidden_dim = int(round(in_channels * expand_ratio))
+        self.use_res_connect = self.stride == 1 \
+            and in_channels == out_channels
+
+        layers = []
+        if expand_ratio != 1:
+            # pw
+            layers.append(
+                ConvModule(
+                    in_channels,
+                    hidden_dim,
+                    kernel_size=1,
+                    conv_cfg=conv_cfg,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg))
+        layers.extend([
+            # dw
+            ConvModule(
+                hidden_dim,
+                hidden_dim,
+                kernel_size=3,
+                padding=dilation,
+                stride=stride,
+                dilation=dilation,
+                groups=hidden_dim,
+                conv_cfg=conv_cfg,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg),
+            # pw-linear
+            nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
+            build_norm_layer(norm_cfg, out_channels)[1],
+        ])
+        self.conv = nn.Sequential(*layers)
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
diff --git a/tests/test_models/test_backbone.py b/tests/test_models/test_backbone.py
index ba6cdaa..3f654b7 100644
--- a/tests/test_models/test_backbone.py
+++ b/tests/test_models/test_backbone.py
@@ -4,7 +4,8 @@
 from mmcv.utils.parrots_wrapper import _BatchNorm
 from torch.nn.modules import AvgPool2d, GroupNorm
 
-from mmseg.models.backbones import ResNeSt, ResNet, ResNetV1d, ResNeXt
+from mmseg.models.backbones import (FastSCNN, ResNeSt, ResNet, ResNetV1d,
+                                    ResNeXt)
 from mmseg.models.backbones.resnest import Bottleneck as BottleneckS
 from mmseg.models.backbones.resnet import BasicBlock, Bottleneck
 from mmseg.models.backbones.resnext import Bottleneck as BottleneckX
@@ -48,7 +49,6 @@ def check_norm_state(modules, train_state):
 
 
 def test_resnet_basic_block():
-
     with pytest.raises(AssertionError):
         # Not implemented yet.
         dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)
@@ -98,7 +98,6 @@ def test_resnet_basic_block():
 
 
 def test_resnet_bottleneck():
-
     with pytest.raises(AssertionError):
         # Style must be in ['pytorch', 'caffe']
         Bottleneck(64, 64, style='tensorflow')
@@ -667,6 +666,33 @@ def test_resnext_backbone():
     assert feat[3].shape == torch.Size([1, 2048, 7, 7])
 
 
+def test_fastscnn_backbone():
+    with pytest.raises(AssertionError):
+        # Fast-SCNN channel constraints.
+        FastSCNN(
+            3, (32, 48),
+            64, (64, 96, 128), (2, 2, 1),
+            global_out_channels=127,
+            higher_in_channels=64,
+            lower_in_channels=128)
+
+    # Test FastSCNN Standard Forward
+    model = FastSCNN()
+    model.init_weights()
+    model.train()
+    batch_size = 4
+    imgs = torch.randn(batch_size, 3, 512, 1024)
+    feat = model(imgs)
+
+    assert len(feat) == 3
+    # higher-res
+    assert feat[0].shape == torch.Size([batch_size, 64, 64, 128])
+    # lower-res
+    assert feat[1].shape == torch.Size([batch_size, 128, 16, 32])
+    # FFM output
+    assert feat[2].shape == torch.Size([batch_size, 128, 64, 128])
+
+
 def test_resnest_bottleneck():
     with pytest.raises(AssertionError):
         # Style must be in ['pytorch', 'caffe']
diff --git a/tests/test_models/test_heads.py b/tests/test_models/test_heads.py
index 3ac6bb0..8feb0e6 100644
--- a/tests/test_models/test_heads.py
+++ b/tests/test_models/test_heads.py
@@ -6,7 +6,8 @@
 from mmcv.utils.parrots_wrapper import SyncBatchNorm
 
 from mmseg.models.decode_heads import (ANNHead, ASPPHead, CCHead, DAHead,
-                                       DepthwiseSeparableASPPHead, EncHead,
+                                       DepthwiseSeparableASPPHead,
+                                       DepthwiseSeparableFCNHead, EncHead,
                                        FCNHead, GCHead, NLHead, OCRHead,
                                        PSAHead, PSPHead, UPerHead)
 from mmseg.models.decode_heads.decode_head import BaseDecodeHead
@@ -539,3 +540,37 @@ def test_dw_aspp_head():
     assert head.aspp_modules[2].depthwise_conv.dilation == (24, 24)
     outputs = head(inputs)
     assert outputs.shape == (1, head.num_classes, 45, 45)
+
+
+def test_sep_fcn_head():
+    # test sep_fcn_head with concat_input=False
+    head = DepthwiseSeparableFCNHead(
+        in_channels=128,
+        channels=128,
+        concat_input=False,
+        num_classes=19,
+        in_index=-1,
+        norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01))
+    x = [torch.rand(2, 128, 32, 32)]
+    output = head(x)
+    assert output.shape == (2, head.num_classes, 32, 32)
+    assert not head.concat_input
+    from mmseg.ops.separable_conv_module import DepthwiseSeparableConvModule
+    assert isinstance(head.convs[0], DepthwiseSeparableConvModule)
+    assert isinstance(head.convs[1], DepthwiseSeparableConvModule)
+    assert head.conv_seg.kernel_size == (1, 1)
+
+    head = DepthwiseSeparableFCNHead(
+        in_channels=64,
+        channels=64,
+        concat_input=True,
+        num_classes=19,
+        in_index=-1,
+        norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01))
+    x = [torch.rand(3, 64, 32, 32)]
+    output = head(x)
+    assert output.shape == (3, head.num_classes, 32, 32)
+    assert head.concat_input
+    from mmseg.ops.separable_conv_module import DepthwiseSeparableConvModule
+    assert isinstance(head.convs[0], DepthwiseSeparableConvModule)
+    assert isinstance(head.convs[1], DepthwiseSeparableConvModule)
diff --git a/tests/test_utils/test_inverted_residual_module.py b/tests/test_utils/test_inverted_residual_module.py
new file mode 100644
index 0000000..827c105
--- /dev/null
+++ b/tests/test_utils/test_inverted_residual_module.py
@@ -0,0 +1,40 @@
+import pytest
+import torch
+
+from mmseg.utils import InvertedResidual
+
+
+def test_inv_residual():
+    with pytest.raises(AssertionError):
+        # test stride assertion.
+        InvertedResidual(32, 32, 3, 4)
+
+    # test default config with res connection.
+    # set expand_ratio = 4, stride = 1 and inp=oup.
+    inv_module = InvertedResidual(32, 32, 1, 4)
+    assert inv_module.use_res_connect
+    assert inv_module.conv[0].kernel_size == (1, 1)
+    assert inv_module.conv[0].padding == 0
+    assert inv_module.conv[1].kernel_size == (3, 3)
+    assert inv_module.conv[1].padding == 1
+    assert inv_module.conv[0].with_norm
+    assert inv_module.conv[1].with_norm
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 64, 64)
+
+    # test inv_residual module without res connection.
+    # set expand_ratio = 4, stride = 2.
+    inv_module = InvertedResidual(32, 32, 2, 4)
+    assert not inv_module.use_res_connect
+    assert inv_module.conv[0].kernel_size == (1, 1)
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 32, 32)
+
+    # test expand_ratio == 1
+    inv_module = InvertedResidual(32, 32, 1, 1)
+    assert inv_module.conv[0].kernel_size == (3, 3)
+    x = torch.rand(1, 32, 64, 64)
+    output = inv_module(x)
+    assert output.shape == (1, 32, 64, 64)