diff --git a/mmcv/ops/csrc/pytorch/npu/ball_query_npu.cpp b/mmcv/ops/csrc/pytorch/npu/ball_query_npu.cpp new file mode 100644 index 0000000000..9167875376 --- /dev/null +++ b/mmcv/ops/csrc/pytorch/npu/ball_query_npu.cpp @@ -0,0 +1,39 @@ +#include "pytorch_npu_helper.hpp" + +using namespace NPU_NAME_SPACE; +using namespace std; + +void ball_query_forward_npu(int b, int n, int m, float min_radius, + float max_radius, int nsample, const Tensor new_xyz, + const Tensor xyz, Tensor idx) { + int64_t nsample_i64 = nsample; + + // transpose new_xyz from [B, M, 3] to [M, B, 3] + at::Tensor new_xyz_transpose = new_xyz.transpose(0, 1); + + // transpose xyz from [B, N, 3] to [B, 3, N] + at::Tensor xyz_transpose = xyz.transpose(1, 2); + + // transpose idx from [B, M, nsample] to [M, B, nsample] + at::Tensor idx_transpose = NpuUtils::format_contiguous(idx.transpose(0, 1)); + + OpCommand cmd; + cmd.Name("BallQuery") + .Input(xyz_transpose) + .Input(new_xyz_transpose) + .Output(idx_transpose) + .Attr("min_radius", min_radius) + .Attr("max_radius", max_radius) + .Attr("sample_num", nsample_i64) + .Run(); + + idx_transpose = NpuUtils::format_contiguous(idx_transpose.transpose(0, 1)); + idx.copy_(idx_transpose); +} + +void ball_query_forward_impl(int b, int n, int m, float min_radius, + float max_radius, int nsample, + const Tensor new_xyz, const Tensor xyz, + Tensor idx); + +REGISTER_NPU_IMPL(ball_query_forward_impl, ball_query_forward_npu); diff --git a/tests/test_ops/test_ball_query.py b/tests/test_ops/test_ball_query.py index a3f6518197..25899f2e1f 100644 --- a/tests/test_ops/test_ball_query.py +++ b/tests/test_ops/test_ball_query.py @@ -3,7 +3,7 @@ import torch from mmcv.ops import ball_query -from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE +from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE @pytest.mark.parametrize('device', [ @@ -14,7 +14,11 @@ pytest.param( 'mlu', marks=pytest.mark.skipif( - not IS_MLU_AVAILABLE, reason='requires MLU support')) + not IS_MLU_AVAILABLE, reason='requires MLU support')), + pytest.param( + 'npu', + marks=pytest.mark.skipif( + not IS_NPU_AVAILABLE, reason='requires NPU support')) ]) def test_ball_query(device): new_xyz = torch.tensor(