open-mmlab · zhouzaida · Dec 11, 2022 · Nov 23, 2022 · Nov 23, 2022 · Nov 24, 2022
diff --git a/mmcv/cnn/bricks/__init__.py b/mmcv/cnn/bricks/__init__.py
@@ -28,5 +28,5 @@
     'Scale', 'ConvAWS2d', 'ConvWS2d', 'conv_ws_2d',
     'DepthwiseSeparableConvModule', 'Swish', 'Linear', 'Conv2dAdaptivePadding',
     'Conv2d', 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d',
-    'Conv3d', 'Dropout', 'DropPath'
+    'Conv3d', 'Dropout', 'DropPath', 'LayerScale'
 ]
diff --git a/mmcv/cnn/bricks/transformer.py b/mmcv/cnn/bricks/transformer.py
@@ -551,6 +551,38 @@ def forward(self,
         return identity + self.dropout_layer(self.proj_drop(out))
 
 
+class LayerScale(nn.Module):
+    """LayerScale layer.
+
+    Args:
+        dim (int): Dimension of input features.
+        inplace (bool): Whether performs operation in-place.
+            Default: `False`.
+        data_format (str): The input data format, could be 'channels_last'
+            or 'channels_first', representing (B, C, H, W) and
+            (B, N, C) format data respectively. Default: 'channels_last'.
+    """
+
+    def __init__(self,
+                 dim: int,
+                 inplace: bool = False,
+                 data_format: str = 'channels_last'):
+        super().__init__()
+        assert data_format in ('channels_last', 'channels_first'), \
+            "'data_format' could only be channels_last or channels_first."
+        self.inplace = inplace
+        self.data_format = data_format
+        self.weight = nn.Parameter(torch.ones(dim) * 1e-5)
+
+    def forward(self, x):
+        if self.data_format == 'channels_first':
+            if self.inplace:
+                return x.mul_(self.weight.view(-1, 1, 1))
+            else:
+                return x * self.weight.view(-1, 1, 1)
+        return x.mul_(self.weight) if self.inplace else x * self.weight
+
+
 @MODELS.register_module()
 class FFN(BaseModule):
     """Implements feed-forward networks (FFNs) with identity connection.
@@ -568,6 +600,8 @@ class FFN(BaseModule):
             zeroed in FFN. Default 0.0.
         add_identity (bool, optional): Whether to add the
             identity connection. Default: `True`.
+        use_layer_scale (bool): Whether to use layer_scale in FFN.
+            Default: `True`.
         dropout_layer (obj:`ConfigDict`): The dropout_layer used
             when adding the shortcut.
         init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
@@ -588,6 +622,7 @@ def __init__(self,
                  ffn_drop=0.,
                  dropout_layer=None,
                  add_identity=True,
+                 use_layer_scale=True,
                  init_cfg=None,
                  **kwargs):
         super().__init__(init_cfg)
@@ -614,13 +649,19 @@ def __init__(self,
             dropout_layer) if dropout_layer else torch.nn.Identity()
         self.add_identity = add_identity
 
+        if use_layer_scale:
+            self.gamma2 = LayerScale(embed_dims)
+        else:
+            self.gamma2 = nn.Identity()
+
     @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')
     def forward(self, x, identity=None):
         """Forward function for `FFN`.
 
         The function would add x to the output tensor if residue is None.
         """
         out = self.layers(x)
+        out = self.gamma2(out)
         if not self.add_identity:
             return self.dropout_layer(out)
         if identity is None:

diff --git a/tests/test_cnn/test_transformer.py b/tests/test_cnn/test_transformer.py
@@ -7,7 +7,7 @@
 
 from mmcv.cnn.bricks.drop import DropPath
 from mmcv.cnn.bricks.transformer import (FFN, AdaptivePadding,
-                                         BaseTransformerLayer,
+                                         BaseTransformerLayer, LayerScale,
                                          MultiheadAttention, PatchEmbed,
                                          PatchMerging,
                                          TransformerLayerSequence)
@@ -538,7 +538,6 @@ def test_ffn():
     with pytest.raises(AssertionError):
         # num_fcs should be no less than 2
         FFN(num_fcs=1)
-    FFN(dropout=0, add_residual=True)
     ffn = FFN(dropout=0, add_identity=True)
 
     input_tensor = torch.rand(2, 20, 256)
@@ -553,6 +552,52 @@ def test_ffn():
         ffn(input_tensor, identity=residual).sum(),
         ffn(input_tensor).sum() + residual.sum() - input_tensor.sum())
 
+    # test with layer_scale
+    ffn = FFN(dropout=0, add_identity=True, use_layer_scale=True)
+
+    input_tensor = torch.rand(2, 20, 256)
+    input_tensor_nbc = input_tensor.transpose(0, 1)
+    assert torch.allclose(ffn(input_tensor).sum(), ffn(input_tensor_nbc).sum())
+
+
+def test_layer_scale():
+    with pytest.raises(AssertionError):
+        cfg = dict(
+            dim=10,
+            data_format='BNC',
+        )
+        LayerScale(**cfg)
+
+    # test init
+    cfg = dict(dim=10)
+    ls = LayerScale(**cfg)
+    assert torch.equal(ls.weight, torch.ones(10, requires_grad=True) * 1e-5)
+
+    # test forward
+    # test channels_last
+    cfg = dict(dim=256, inplace=False, data_format='channels_last')
+    ls_channels_last = LayerScale(**cfg)
+    x = torch.randn((4, 49, 256))
+    out = ls_channels_last(x)
+    assert tuple(out.size()) == (4, 49, 256)
+    assert torch.equal(x * 1e-5, out)
+
+    # test channels_first
+    cfg = dict(dim=256, inplace=False, data_format='channels_first')
+    ls_channels_first = LayerScale(**cfg)
+    x = torch.randn((4, 256, 7, 7))
+    out = ls_channels_first(x)
+    assert tuple(out.size()) == (4, 256, 7, 7)
+    assert torch.equal(x * 1e-5, out)
+
+    # test inplace True
+    cfg = dict(dim=256, inplace=True, data_format='channels_first')
+    ls_channels_first = LayerScale(**cfg)
+    x = torch.randn((4, 256, 7, 7))
+    out = ls_channels_first(x)
+    assert tuple(out.size()) == (4, 256, 7, 7)
+    assert x is out
+
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason='Cuda not available')
 def test_basetransformerlayer_cuda():