From d2aecbe494734ec057ee79c8888bec1058dfc995 Mon Sep 17 00:00:00 2001
From: tudejiang79 <57201278+tudejiang79@users.noreply.github.com>
Date: Thu, 1 Jun 2023 00:55:20 +0800
Subject: [PATCH] [Feature] Add the support for rotated_feature_align with MLU
 (#2809)

---
 docs/en/understand_mmcv/ops.md                |   2 +-
 docs/zh_cn/understand_mmcv/ops.md             |   2 +-
 .../pytorch/mlu/rotated_feature_align_mlu.cpp | 115 ++++++++++++++++++
 tests/test_ops/test_rotated_feature_align.py  |   6 +-
 4 files changed, 122 insertions(+), 3 deletions(-)
 create mode 100644 mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp

diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md
index c401f369b9..34604c05f4 100644
--- a/docs/en/understand_mmcv/ops.md
+++ b/docs/en/understand_mmcv/ops.md
@@ -41,7 +41,7 @@ We implement common ops used in detection, segmentation, etc.
 | PointsInBoxes                | √   | √    |     |     |        |
 | PointsInPolygons             |     | √    |     |     |        |
 | PSAMask                      | √   | √    | √   |     | √      |
-| RotatedFeatureAlign          | √   | √    |     |     |        |
+| RotatedFeatureAlign          | √   | √    | √   |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
 | RoIPool                      |     | √    | √   |     | √      |
 | RoIAlignRotated              | √   | √    | √   |     |        |
diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md
index 35082785f0..67ba3e681b 100644
--- a/docs/zh_cn/understand_mmcv/ops.md
+++ b/docs/zh_cn/understand_mmcv/ops.md
@@ -41,7 +41,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | PointsInBoxes                | √   | √    |     |     |        |
 | PointsInPolygons             |     | √    |     |     |        |
 | PSAMask                      | √   | √    | √   |     | √      |
-| RotatedFeatureAlign          | √   | √    |     |     |        |
+| RotatedFeatureAlign          | √   | √    | √   |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
 | RoIPool                      |     | √    | √   |     | √      |
 | RoIAlignRotated              | √   | √    | √   |     |        |
diff --git a/mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp b/mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp
new file mode 100644
index 0000000000..a827210d2b
--- /dev/null
+++ b/mmcv/ops/csrc/pytorch/mlu/rotated_feature_align_mlu.cpp
@@ -0,0 +1,115 @@
+/*************************************************************************
+ * Copyright (C) 2022 by Cambricon.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#include "mlu_common_helper.h"
+
+void RotatedFeatureAlignForwardMLUKernelLauncher(const Tensor features,
+                                                 const Tensor best_bboxes,
+                                                 const float spatial_scale,
+                                                 const int points,
+                                                 Tensor output) {
+  auto memory_format =
+      torch_mlu::cnnl::ops::get_channels_last_memory_format(features.dim());
+  auto features_ =
+      torch_mlu::cnnl::ops::cnnl_contiguous(features, memory_format);
+  auto best_bboxes_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
+      best_bboxes, best_bboxes.suggest_memory_format());
+  auto output_contiguous =
+      torch_mlu::cnnl::ops::cnnl_contiguous(output, memory_format);
+
+  MluOpTensorDescriptor features_desc, best_bboxes_desc, output_desc;
+  features_desc.set_with_layout(features_, MLUOP_LAYOUT_NHWC);
+  best_bboxes_desc.set(best_bboxes_contiguous);
+  output_desc.set_with_layout(output_contiguous, MLUOP_LAYOUT_NHWC);
+
+  // get ptr of tensors
+  auto features_impl = torch_mlu::getMluTensorImpl(features_);
+  auto features_ptr = features_impl->cnnlMalloc();
+  auto best_bboxes_impl = torch_mlu::getMluTensorImpl(best_bboxes_contiguous);
+  auto best_bboxes_ptr = best_bboxes_impl->cnnlMalloc();
+  auto output_impl = torch_mlu::getMluTensorImpl(output_contiguous);
+  auto output_ptr = output_impl->cnnlMalloc();
+
+  // get compute handle
+  auto handle = mluOpGetCurrentHandle();
+  mluOpRotatedFeatureAlignForward(
+      handle, features_desc.desc(), features_ptr, best_bboxes_desc.desc(),
+      best_bboxes_ptr, spatial_scale, points, output_desc.desc(), output_ptr);
+
+  output.copy_(output_contiguous);
+}
+
+void RotatedFeatureAlignBackwardMLUKernelLauncher(const Tensor top_grad,
+                                                  const Tensor best_bboxes,
+                                                  const float spatial_scale,
+                                                  const int points,
+                                                  Tensor bottom_grad) {
+  auto memory_format =
+      torch_mlu::cnnl::ops::get_channels_last_memory_format(top_grad.dim());
+  auto top_grad_ =
+      torch_mlu::cnnl::ops::cnnl_contiguous(top_grad, memory_format);
+  auto best_bboxes_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
+      best_bboxes, best_bboxes.suggest_memory_format());
+  auto bottom_grad_ =
+      torch_mlu::cnnl::ops::cnnl_contiguous(bottom_grad, memory_format);
+
+  // get ptr of tensors
+  auto top_grad_impl = torch_mlu::getMluTensorImpl(top_grad_);
+  auto top_grad_ptr = top_grad_impl->cnnlMalloc();
+  auto best_bboxes_impl = torch_mlu::getMluTensorImpl(best_bboxes_contiguous);
+  auto best_bboxes_ptr = best_bboxes_impl->cnnlMalloc();
+  auto bottom_grad_impl = torch_mlu::getMluTensorImpl(bottom_grad_);
+  auto bottom_grad_ptr = bottom_grad_impl->cnnlMalloc();
+
+  MluOpTensorDescriptor top_grad_desc, best_bboxes_desc, bottom_grad_desc;
+  top_grad_desc.set_with_layout(top_grad_, MLUOP_LAYOUT_NHWC);
+  best_bboxes_desc.set(best_bboxes_contiguous);
+  bottom_grad_desc.set_with_layout(bottom_grad_, MLUOP_LAYOUT_NHWC);
+
+  // get compute handle
+  auto handle = mluOpGetCurrentHandle();
+  mluOpRotatedFeatureAlignBackward(handle, top_grad_desc.desc(), top_grad_ptr,
+                                   best_bboxes_desc.desc(), best_bboxes_ptr,
+                                   spatial_scale, points,
+                                   bottom_grad_desc.desc(), bottom_grad_ptr);
+  bottom_grad.copy_(bottom_grad_);
+}
+
+void rotated_feature_align_forward_mlu(const Tensor features,
+                                       const Tensor best_bboxes,
+                                       const float spatial_scale,
+                                       const int points, Tensor output) {
+  RotatedFeatureAlignForwardMLUKernelLauncher(features, best_bboxes,
+                                              spatial_scale, points, output);
+}
+
+void rotated_feature_align_backward_mlu(const Tensor top_grad,
+                                        const Tensor best_bboxes,
+                                        const float spatial_scale,
+                                        const int points, Tensor bottom_grad) {
+  RotatedFeatureAlignBackwardMLUKernelLauncher(
+      top_grad, best_bboxes, spatial_scale, points, bottom_grad);
+}
+
+void rotated_feature_align_forward_impl(const Tensor features,
+                                        const Tensor best_bboxes,
+                                        const float spatial_scale,
+                                        const int points, Tensor output);
+
+void rotated_feature_align_backward_impl(const Tensor top_grad,
+                                         const Tensor best_bboxes,
+                                         const float spatial_scale,
+                                         const int points, Tensor bottom_grad);
+
+REGISTER_DEVICE_IMPL(rotated_feature_align_forward_impl, MLU,
+                     rotated_feature_align_forward_mlu);
+REGISTER_DEVICE_IMPL(rotated_feature_align_backward_impl, MLU,
+                     rotated_feature_align_backward_mlu);
diff --git a/tests/test_ops/test_rotated_feature_align.py b/tests/test_ops/test_rotated_feature_align.py
index e7422a3106..005cbcf01c 100644
--- a/tests/test_ops/test_rotated_feature_align.py
+++ b/tests/test_ops/test_rotated_feature_align.py
@@ -3,7 +3,7 @@
 import torch
 
 from mmcv.ops import rotated_feature_align
-from mmcv.utils import IS_CUDA_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
 
 
 @pytest.mark.skipif(
@@ -13,6 +13,10 @@
         'cuda',
         marks=pytest.mark.skipif(
             not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+    pytest.param(
+        'mlu',
+        marks=pytest.mark.skipif(
+            not IS_MLU_AVAILABLE, reason='requires MLU support')),
     pytest.param(
         'cpu',
         marks=pytest.mark.skipif(