Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify the unsqueeze dimension of input data in conv1d NCL And NLC format #38425

Merged
merged 11 commits into from
Feb 10, 2022
45 changes: 24 additions & 21 deletions paddle/fluid/operators/conv_cudnn_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,10 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
cudnnConvolutionBwdFilterAlgo_t filter_algo =
static_cast<cudnnConvolutionBwdFilterAlgo_t>(0);
#endif
size_t workspace_size = 0;
// input data workspace_size
size_t workspace_size_d = 0;
// weight workspace_size
size_t workspace_size_w = 0;
int iwo_groups = groups;
int c_groups = 1;

Expand All @@ -661,16 +664,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {

#ifdef PADDLE_WITH_HIP
using search1 = SearchAlgorithm<miopenConvBwdDataAlgorithm_t>;
workspace_size =
std::max(workspace_size, search1::GetWorkspaceSize(args1));
workspace_size_d =
std::max(workspace_size_d, search1::GetWorkspaceSize(args1));
data_algo = search1::Find<T>(args1, exhaustive_search, deterministic,
workspace_size, ctx);
workspace_size_d, ctx);
#else
using search1 = SearchAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>;
data_algo =
search1::Find<T>(args1, exhaustive_search, deterministic, ctx);
workspace_size =
std::max(workspace_size, search1::GetWorkspaceSize(args1, data_algo));
workspace_size_d = std::max(workspace_size_d,
search1::GetWorkspaceSize(args1, data_algo));
#endif
}

Expand All @@ -686,16 +689,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
platform::AllowTF32Cudnn(), c_groups);
#ifdef PADDLE_WITH_HIP
using search2 = SearchAlgorithm<miopenConvBwdWeightsAlgorithm_t>;
workspace_size =
std::max(workspace_size, search2::GetWorkspaceSize(args2));
workspace_size_w =
std::max(workspace_size_w, search2::GetWorkspaceSize(args2));
filter_algo = search2::Find<T>(args2, exhaustive_search, deterministic,
workspace_size, ctx);
workspace_size_w, ctx);
#else
using search2 = SearchAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t>;
filter_algo =
search2::Find<T>(args2, exhaustive_search, deterministic, ctx);
workspace_size = std::max(workspace_size,
search2::GetWorkspaceSize(args2, filter_algo));
workspace_size_w = std::max(
workspace_size_w, search2::GetWorkspaceSize(args2, filter_algo));
#endif
}

Expand Down Expand Up @@ -726,9 +729,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
handle, &alpha, args1.odesc.desc(), output_grad_data,
args1.wdesc.desc(), filter_data, args1.cdesc.desc(),
data_algo, &beta, args1.idesc.desc(), temp_tensor_data,
cudnn_workspace_ptr, workspace_size));
cudnn_workspace_ptr, workspace_size_d));
},
workspace_size);
workspace_size_d);
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::miopenOpTensor(
handle, miopenTensorOpAdd, &alpha, args1.idesc.desc(),
transformed_input_grad_data, &alpha, args1.idesc.desc(),
Expand All @@ -743,9 +746,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
args1.wdesc.desc(), filter_data, args1.cdesc.desc(),
data_algo, &beta, args1.idesc.desc(),
transformed_input_grad_data, cudnn_workspace_ptr,
workspace_size));
workspace_size_d));
},
workspace_size);
workspace_size_d);
}

#else
Expand All @@ -758,10 +761,10 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
filter_data + i * group_offset_filter, args1.odesc.desc(),
output_grad_data + i * group_offset_out,
args1.cdesc.desc(), data_algo, cudnn_workspace_ptr,
workspace_size, &beta, args1.idesc.desc(),
workspace_size_d, &beta, args1.idesc.desc(),
transformed_input_grad_data + i * group_offset_in));
},
workspace_size);
workspace_size_d);
}
#endif
if (!is_sys_pad) {
Expand Down Expand Up @@ -804,9 +807,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
handle, &alpha, args2.odesc.desc(), output_grad_data,
args2.idesc.desc(), input_data, args2.cdesc.desc(),
filter_algo, &beta, args2.wdesc.desc(), filter_grad_data,
cudnn_workspace_ptr, workspace_size));
cudnn_workspace_ptr, workspace_size_w));
},
workspace_size);
workspace_size_w);
#else
for (int i = 0; i < groups; i++) {
workspace_handle.RunFunc(
Expand All @@ -817,10 +820,10 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
input_data + i * group_offset_in, args2.odesc.desc(),
output_grad_data + i * group_offset_out,
args2.cdesc.desc(), filter_algo, cudnn_workspace_ptr,
workspace_size, &beta_filter, args2.wdesc.desc(),
workspace_size_w, &beta_filter, args2.wdesc.desc(),
filter_grad_data + i * group_offset_filter));
},
workspace_size);
workspace_size_w);
}
#endif

Expand Down
15 changes: 15 additions & 0 deletions python/paddle/fluid/tests/unittests/test_conv1d_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,20 @@ def add_cases(suite):
suite.addTest(
Conv1DTestCase(
methodName='runTest', filter_size=3, padding='valid'))
suite.addTest(
Conv1DTestCase(
methodName='runTest', num_filters=512, padding='valid'))
suite.addTest(
Conv1DTestCase(
methodName='runTest', num_filters=512, padding=[1, 2]))
suite.addTest(
Conv1DTestCase(
methodName='runTest', padding=2, data_format='NLC'))
suite.addTest(Conv1DTestCase(methodName='runTest', padding=[1]))
suite.addTest(Conv1DTestCase(methodName='runTest', padding=[1, 2]))
suite.addTest(
Conv1DTestCase(
methodName='runTest', padding=[1, 2], data_format='NLC'))
suite.addTest(Conv1DTestCase(methodName='runTest', padding=2))
suite.addTest(Conv1DTestCase(methodName='runTest'))
suite.addTest(
Expand Down Expand Up @@ -204,6 +213,12 @@ def add_error_cases(suite):
suite.addTest(
Conv1DErrorTestCase(
methodName='runTest', padding=[1, 2, 3, 4, 5]))
suite.addTest(
Conv1DErrorTestCase(
methodName='runTest', padding=[1, 2, 3, 4, 5], data_format='NLC'))
suite.addTest(
Conv1DErrorTestCase(
methodName='runTest', num_filters=512, padding=[1, 2, 3, 4, 5]))
suite.addTest(Conv1DErrorTestCase(methodName='runTest', dilation=-10))


Expand Down
21 changes: 12 additions & 9 deletions python/paddle/nn/functional/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,21 +326,24 @@ def conv1d(x,

# update attrs
padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1)

if len(padding) == 2:
padding = padding + [0] * 2
padding = [0] * 2 + padding
elif len(padding) == 1:
padding = padding + [0]
padding = [0] + padding
else:
raise ValueError(
"The size of padding's dimension should be 1 or 2. But got padding={}".
format(padding))

stride = convert_to_list(stride, 1, 'stride') + [1]
dilation = convert_to_list(dilation, 1, 'dilation') + [1]
stride = [1] + convert_to_list(stride, 1, 'stride')
dilation = [1] + convert_to_list(dilation, 1, 'dilation')
weight = unsqueeze(weight, axis=[-2])

l_type = "conv2d"
if (num_channels == groups and num_channels != 1 and
num_filters % num_channels == 0 and not use_cudnn):

# When "groups==num_channels and num_filters% num_channels == 0" using depthwise_conv2d has better performance
if (core.is_compiled_with_cuda() and num_channels == groups and
num_channels != 1 and num_filters % num_channels == 0):
l_type = 'depthwise_conv2d'
use_cudnn = False

Expand All @@ -351,9 +354,9 @@ def conv1d(x,
else:
l_type = 'conv2d'

squeeze_aixs = -2 if channel_last else -1
squeeze_aixs = -3 if channel_last else -2
Zjq9409 marked this conversation as resolved.
Show resolved Hide resolved
x = unsqueeze(x, axis=[squeeze_aixs])
weight = unsqueeze(weight, axis=[-1])

if in_dygraph_mode():
attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
Expand Down