diff --git a/docs/zh_cn/tutorials/optim_wrapper.md b/docs/zh_cn/tutorials/optim_wrapper.md
index f54468cf19..7f9ff79b44 100644
--- a/docs/zh_cn/tutorials/optim_wrapper.md
+++ b/docs/zh_cn/tutorials/optim_wrapper.md
@@ -338,17 +338,19 @@ optimizer = build_optim_wrapper(ToyModel(), optim_wrapper)
 
 `decay_mult`：所有参数的衰减系数
 
-`bias_lr_mult`：偏置的学习率系数（不包括正则化层的偏置以及可变形卷积的 offset），默认值为 1
+`bias_lr_mult`：偏置的学习率系数（不包括正则化层的偏置以及可变形卷积的 offset）
 
-`bias_decay_mult`：偏置的权值衰减系数（不包括正则化层的偏置以及可变形卷积的 offset），默认值为 1
+`bias_decay_mult`：偏置的权值衰减系数（不包括正则化层的偏置以及可变形卷积的 offset）
 
-`norm_decay_mult`：正则化层权重和偏置的权值衰减系数，默认值为 1
+`norm_decay_mult`：正则化层权重和偏置的权值衰减系数
 
-`dwconv_decay_mult`：Depth-wise 卷积的权值衰减系数，默认值为 1
+`flat_decay_mult`：一维参数的权值衰减系数
+
+`dwconv_decay_mult`：Depth-wise 卷积的权值衰减系数
 
 `bypass_duplicate`：是否跳过重复的参数，默认为 `False`
 
-`dcn_offset_lr_mult`：可变形卷积（Deformable Convolution）的学习率系数，默认值为 1
+`dcn_offset_lr_mult`：可变形卷积（Deformable Convolution）的学习率系数
 
 ### 为模型不同部分的参数设置不同的超参系数
 
diff --git a/mmengine/optim/optimizer/default_constructor.py b/mmengine/optim/optimizer/default_constructor.py
index 09ce17993c..e89cf53fb1 100644
--- a/mmengine/optim/optimizer/default_constructor.py
+++ b/mmengine/optim/optimizer/default_constructor.py
@@ -42,6 +42,8 @@ class DefaultOptimWrapperConstructor:
     - ``norm_decay_mult`` (float): It will be multiplied to the weight
       decay for all weight and bias parameters of normalization
       layers.
+    - ``flat_decay_mult`` (float): It will be multiplied to the weight
+      decay for all one-dimensional parameters
     - ``dwconv_decay_mult`` (float): It will be multiplied to the weight
       decay for all weight and bias parameters of depthwise conv
       layers.
@@ -185,12 +187,13 @@ def add_params(self,
         # first sort with alphabet order and then sort with reversed len of str
         sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True)
 
-        bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.)
-        bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.)
-        norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.)
-        dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.)
+        bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', None)
+        bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', None)
+        norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', None)
+        dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', None)
+        flat_decay_mult = self.paramwise_cfg.get('flat_decay_mult', None)
         bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False)
-        dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.)
+        dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', None)
 
         # special rules for norm layers and depth-wise conv layers
         is_norm = isinstance(module,
@@ -226,10 +229,12 @@ def add_params(self,
             if not is_custom:
                 # bias_lr_mult affects all bias parameters
                 # except for norm.bias dcn.conv_offset.bias
-                if name == 'bias' and not (is_norm or is_dcn_module):
+                if name == 'bias' and not (
+                        is_norm or is_dcn_module) and bias_lr_mult is not None:
                     param_group['lr'] = self.base_lr * bias_lr_mult
 
                 if (prefix.find('conv_offset') != -1 and is_dcn_module
+                        and dcn_offset_lr_mult is not None
                         and isinstance(module, torch.nn.Conv2d)):
                     # deal with both dcn_offset's bias & weight
                     param_group['lr'] = self.base_lr * dcn_offset_lr_mult
@@ -237,18 +242,23 @@ def add_params(self,
                 # apply weight decay policies
                 if self.base_wd is not None:
                     # norm decay
-                    if is_norm:
+                    if is_norm and norm_decay_mult is not None:
                         param_group[
                             'weight_decay'] = self.base_wd * norm_decay_mult
+                    # bias lr and decay
+                    elif (name == 'bias' and not is_dcn_module
+                          and bias_decay_mult is not None):
+                        param_group[
+                            'weight_decay'] = self.base_wd * bias_decay_mult
                     # depth-wise conv
-                    elif is_dwconv:
+                    elif is_dwconv and dwconv_decay_mult is not None:
                         param_group[
                             'weight_decay'] = self.base_wd * dwconv_decay_mult
-                    # bias lr and decay
-                    elif name == 'bias' and not is_dcn_module:
-                        # TODO: current bias_decay_mult will have affect on DCN
+                    # flatten parameters except dcn offset
+                    elif (param.ndim == 1 and not is_dcn_module
+                          and flat_decay_mult is not None):
                         param_group[
-                            'weight_decay'] = self.base_wd * bias_decay_mult
+                            'weight_decay'] = self.base_wd * flat_decay_mult
             params.append(param_group)
             for key, value in param_group.items():
                 if key == 'params':
diff --git a/tests/test_optim/test_optimizer/test_optimizer.py b/tests/test_optim/test_optimizer/test_optimizer.py
index ddbda7e58d..d82849760c 100644
--- a/tests/test_optim/test_optimizer/test_optimizer.py
+++ b/tests/test_optim/test_optimizer/test_optimizer.py
@@ -123,6 +123,7 @@ def _check_sgd_optimizer(self,
                              norm_decay_mult=1,
                              dwconv_decay_mult=1,
                              dcn_offset_lr_mult=1,
+                             flat_decay_mult=1,
                              bypass_duplicate=False):
         param_groups = optimizer.param_groups
         assert isinstance(optimizer, torch.optim.SGD)
@@ -139,7 +140,7 @@ def _check_sgd_optimizer(self,
         # param1
         param1 = param_groups[0]
         assert param1['lr'] == self.base_lr
-        assert param1['weight_decay'] == self.base_wd
+        assert param1['weight_decay'] == self.base_wd * flat_decay_mult
         # conv1.weight
         conv1_weight = param_groups[1]
         assert conv1_weight['lr'] == self.base_lr
@@ -163,7 +164,7 @@ def _check_sgd_optimizer(self,
         # sub.param1
         sub_param1 = param_groups[6]
         assert sub_param1['lr'] == self.base_lr
-        assert sub_param1['weight_decay'] == self.base_wd
+        assert sub_param1['weight_decay'] == self.base_wd * flat_decay_mult
         # sub.conv1.weight
         sub_conv1_weight = param_groups[7]
         assert sub_conv1_weight['lr'] == self.base_lr
@@ -172,8 +173,7 @@ def _check_sgd_optimizer(self,
         # sub.conv1.bias
         sub_conv1_bias = param_groups[8]
         assert sub_conv1_bias['lr'] == self.base_lr * bias_lr_mult
-        assert sub_conv1_bias[
-            'weight_decay'] == self.base_wd * dwconv_decay_mult
+        assert sub_conv1_bias['weight_decay'] == self.base_wd * bias_decay_mult
         # sub.gn.weight
         sub_gn_weight = param_groups[9]
         assert sub_gn_weight['lr'] == self.base_lr
@@ -258,7 +258,8 @@ def test_build_default_optimizer_constructor(self):
             bias_decay_mult=0.5,
             norm_decay_mult=0,
             dwconv_decay_mult=0.1,
-            dcn_offset_lr_mult=0.1)
+            dcn_offset_lr_mult=0.1,
+            flat_decay_mult=0.3)
         optim_constructor_cfg = dict(
             type='DefaultOptimWrapperConstructor',
             optim_wrapper_cfg=optim_wrapper,
@@ -390,7 +391,8 @@ def test_default_optimizer_constructor_with_model_wrapper(self):
             bias_decay_mult=0.5,
             norm_decay_mult=0,
             dwconv_decay_mult=0.1,
-            dcn_offset_lr_mult=0.1)
+            dcn_offset_lr_mult=0.1,
+            flat_decay_mult=0.3)
         optim_constructor = DefaultOptimWrapperConstructor(
             optim_wrapper_cfg, paramwise_cfg)
         optim_wrapper = optim_constructor(model)
@@ -429,7 +431,8 @@ def test_default_optimizer_constructor_with_model_wrapper(self):
                 bias_decay_mult=0.5,
                 norm_decay_mult=0,
                 dwconv_decay_mult=0.1,
-                dcn_offset_lr_mult=0.1)
+                dcn_offset_lr_mult=0.1,
+                flat_decay_mult=0.3)
             optim_constructor = DefaultOptimWrapperConstructor(
                 optim_wrapper_cfg, paramwise_cfg)
             optim_wrapper = optim_constructor(model)
@@ -484,7 +487,8 @@ def test_default_optimizer_constructor_with_paramwise_cfg(self):
             bias_decay_mult=0.5,
             norm_decay_mult=0,
             dwconv_decay_mult=0.1,
-            dcn_offset_lr_mult=0.1)
+            dcn_offset_lr_mult=0.1,
+            flat_decay_mult=0.3)
         optim_constructor = DefaultOptimWrapperConstructor(
             optim_wrapper_cfg, paramwise_cfg)
         optim_wrapper = optim_constructor(self.model)
@@ -554,6 +558,7 @@ def test_default_optimizer_constructor_bypass_duplicate(self):
             norm_decay_mult=0,
             dwconv_decay_mult=0.1,
             dcn_offset_lr_mult=0.1,
+            flat_decay_mult=0.3,
             bypass_duplicate=True)
         optim_constructor = DefaultOptimWrapperConstructor(
             optim_wrapper_cfg, paramwise_cfg)