From d2aecbe494734ec057ee79c8888bec1058dfc995 Mon Sep 17 00:00:00 2001 From: tudejiang79 <57201278+tudejiang79@users.noreply.github.com> Date: Thu, 1 Jun 2023 00:55:20 +0800 Subject: [PATCH] [Feature] Add the support for rotated_feature_align with MLU (#2809) --- docs/en/understand_mmcv/ops.md | 2 +- docs/zh_cn/understand_mmcv/ops.md | 2 +- .../pytorch/mlu/rotated_feature_align_mlu.cpp | 115 ++++++++++++++++++ tests/test_ops/test_rotated_feature_align.py | 6 +- 4 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md index c401f369b9..34604c05f4 100644 --- a/docs/en/understand_mmcv/ops.md +++ b/docs/en/understand_mmcv/ops.md @@ -41,7 +41,7 @@ We implement common ops used in detection, segmentation, etc. | PointsInBoxes | √ | √ | | | | | PointsInPolygons | | √ | | | | | PSAMask | √ | √ | √ | | √ | -| RotatedFeatureAlign | √ | √ | | | | +| RotatedFeatureAlign | √ | √ | √ | | | | RoIPointPool3d | | √ | √ | | | | RoIPool | | √ | √ | | √ | | RoIAlignRotated | √ | √ | √ | | | diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md index 35082785f0..67ba3e681b 100644 --- a/docs/zh_cn/understand_mmcv/ops.md +++ b/docs/zh_cn/understand_mmcv/ops.md @@ -41,7 +41,7 @@ MMCV 提供了检测、分割等任务中常用的算子 | PointsInBoxes | √ | √ | | | | | PointsInPolygons | | √ | | | | | PSAMask | √ | √ | √ | | √ | -| RotatedFeatureAlign | √ | √ | | | | +| RotatedFeatureAlign | √ | √ | √ | | | | RoIPointPool3d | | √ | √ | | | | RoIPool | | √ | √ | | √ | | RoIAlignRotated | √ | √ | √ | | | diff --git a/mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp b/mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp new file mode 100644 index 0000000000..a827210d2b --- /dev/null +++ b/mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp @@ -0,0 +1,115 @@ +/************************************************************************* + * Copyright (C) 2022 by Cambricon. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *************************************************************************/ +#include "mlu_common_helper.h" + +void RotatedFeatureAlignForwardMLUKernelLauncher(const Tensor features, + const Tensor best_bboxes, + const float spatial_scale, + const int points, + Tensor output) { + auto memory_format = + torch_mlu::cnnl::ops::get_channels_last_memory_format(features.dim()); + auto features_ = + torch_mlu::cnnl::ops::cnnl_contiguous(features, memory_format); + auto best_bboxes_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous( + best_bboxes, best_bboxes.suggest_memory_format()); + auto output_contiguous = + torch_mlu::cnnl::ops::cnnl_contiguous(output, memory_format); + + MluOpTensorDescriptor features_desc, best_bboxes_desc, output_desc; + features_desc.set_with_layout(features_, MLUOP_LAYOUT_NHWC); + best_bboxes_desc.set(best_bboxes_contiguous); + output_desc.set_with_layout(output_contiguous, MLUOP_LAYOUT_NHWC); + + // get ptr of tensors + auto features_impl = torch_mlu::getMluTensorImpl(features_); + auto features_ptr = features_impl->cnnlMalloc(); + auto best_bboxes_impl = torch_mlu::getMluTensorImpl(best_bboxes_contiguous); + auto best_bboxes_ptr = best_bboxes_impl->cnnlMalloc(); + auto output_impl = torch_mlu::getMluTensorImpl(output_contiguous); + auto output_ptr = output_impl->cnnlMalloc(); + + // get compute handle + auto handle = mluOpGetCurrentHandle(); + mluOpRotatedFeatureAlignForward( + handle, features_desc.desc(), features_ptr, best_bboxes_desc.desc(), + best_bboxes_ptr, spatial_scale, points, output_desc.desc(), output_ptr); + + output.copy_(output_contiguous); +} + +void RotatedFeatureAlignBackwardMLUKernelLauncher(const Tensor top_grad, + const Tensor best_bboxes, + const float spatial_scale, + const int points, + Tensor bottom_grad) { + auto memory_format = + torch_mlu::cnnl::ops::get_channels_last_memory_format(top_grad.dim()); + auto top_grad_ = + torch_mlu::cnnl::ops::cnnl_contiguous(top_grad, memory_format); + auto best_bboxes_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous( + best_bboxes, best_bboxes.suggest_memory_format()); + auto bottom_grad_ = + torch_mlu::cnnl::ops::cnnl_contiguous(bottom_grad, memory_format); + + // get ptr of tensors + auto top_grad_impl = torch_mlu::getMluTensorImpl(top_grad_); + auto top_grad_ptr = top_grad_impl->cnnlMalloc(); + auto best_bboxes_impl = torch_mlu::getMluTensorImpl(best_bboxes_contiguous); + auto best_bboxes_ptr = best_bboxes_impl->cnnlMalloc(); + auto bottom_grad_impl = torch_mlu::getMluTensorImpl(bottom_grad_); + auto bottom_grad_ptr = bottom_grad_impl->cnnlMalloc(); + + MluOpTensorDescriptor top_grad_desc, best_bboxes_desc, bottom_grad_desc; + top_grad_desc.set_with_layout(top_grad_, MLUOP_LAYOUT_NHWC); + best_bboxes_desc.set(best_bboxes_contiguous); + bottom_grad_desc.set_with_layout(bottom_grad_, MLUOP_LAYOUT_NHWC); + + // get compute handle + auto handle = mluOpGetCurrentHandle(); + mluOpRotatedFeatureAlignBackward(handle, top_grad_desc.desc(), top_grad_ptr, + best_bboxes_desc.desc(), best_bboxes_ptr, + spatial_scale, points, + bottom_grad_desc.desc(), bottom_grad_ptr); + bottom_grad.copy_(bottom_grad_); +} + +void rotated_feature_align_forward_mlu(const Tensor features, + const Tensor best_bboxes, + const float spatial_scale, + const int points, Tensor output) { + RotatedFeatureAlignForwardMLUKernelLauncher(features, best_bboxes, + spatial_scale, points, output); +} + +void rotated_feature_align_backward_mlu(const Tensor top_grad, + const Tensor best_bboxes, + const float spatial_scale, + const int points, Tensor bottom_grad) { + RotatedFeatureAlignBackwardMLUKernelLauncher( + top_grad, best_bboxes, spatial_scale, points, bottom_grad); +} + +void rotated_feature_align_forward_impl(const Tensor features, + const Tensor best_bboxes, + const float spatial_scale, + const int points, Tensor output); + +void rotated_feature_align_backward_impl(const Tensor top_grad, + const Tensor best_bboxes, + const float spatial_scale, + const int points, Tensor bottom_grad); + +REGISTER_DEVICE_IMPL(rotated_feature_align_forward_impl, MLU, + rotated_feature_align_forward_mlu); +REGISTER_DEVICE_IMPL(rotated_feature_align_backward_impl, MLU, + rotated_feature_align_backward_mlu); diff --git a/tests/test_ops/test_rotated_feature_align.py b/tests/test_ops/test_rotated_feature_align.py index e7422a3106..005cbcf01c 100644 --- a/tests/test_ops/test_rotated_feature_align.py +++ b/tests/test_ops/test_rotated_feature_align.py @@ -3,7 +3,7 @@ import torch from mmcv.ops import rotated_feature_align -from mmcv.utils import IS_CUDA_AVAILABLE +from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE @pytest.mark.skipif( @@ -13,6 +13,10 @@ 'cuda', marks=pytest.mark.skipif( not IS_CUDA_AVAILABLE, reason='requires CUDA support')), + pytest.param( + 'mlu', + marks=pytest.mark.skipif( + not IS_MLU_AVAILABLE, reason='requires MLU support')), pytest.param( 'cpu', marks=pytest.mark.skipif(