Skip to content

Commit

Permalink
[Feature] Add the support for group_points ops with Ascend (#2935)
Browse files Browse the repository at this point in the history
  • Loading branch information
673958639 committed Sep 20, 2023
1 parent ca99624 commit 8523eee
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 6 deletions.
45 changes: 45 additions & 0 deletions mmcv/ops/csrc/pytorch/npu/group_points_npu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include "pytorch_npu_helper.hpp"

using namespace NPU_NAME_SPACE;
using namespace std;

void group_points_forward_npu(int b, int c, int n, int npoints, int nsample,
const Tensor points, const Tensor idx,
Tensor out) {
// b, c, n, and npoints do not need to be passed into gatherv2,
// b, c, n, and npoints are calculated inside the operator
// gatherv2 operator in ascend needs to set axis to 0, batch_dims is 0
c10::SmallVector<int64_t, N> axis = {0};
int64_t batch_dims = 0;

auto index = at::arange(0, b);
index = index.to(points.device());
index = index.view({-1, 1, 1});
index = at::mul(index, n);
at::Tensor indices = at::add(index, idx);
indices = indices.view({-1});

at::Tensor trans_features = points.transpose(1, 2);
at::Tensor features = NpuUtils::format_contiguous(trans_features);
features = features.view({b * n, c});

OpCommand cmd;
cmd.Name("GatherV2")
.Input(features)
.Input(indices)
.Input(axis)
.Output(out)
.Attr("batch_dims", batch_dims)
.Run();

at::Tensor output =
out.view({b, npoints, nsample, c}).transpose(1, 3).transpose(2, 3);
at::Tensor res = NpuUtils::format_contiguous(output);
out.copy_(res);
}

void group_points_forward_impl(int b, int c, int n, int npoints, int nsample,
const Tensor points, const Tensor idx,
Tensor out);

REGISTER_NPU_IMPL(group_points_forward_impl, group_points_forward_npu);
21 changes: 15 additions & 6 deletions tests/test_ops/test_group_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,25 @@
import torch

from mmcv.ops import grouping_operation
from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE


@pytest.mark.skipif(
not torch.cuda.is_available(), reason='requires CUDA support')
@pytest.mark.parametrize('device', [
pytest.param(
'cuda',
marks=pytest.mark.skipif(
not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
pytest.param(
'npu',
marks=pytest.mark.skipif(
not IS_NPU_AVAILABLE, reason='requires NPU support'))
])
@pytest.mark.parametrize('dtype', [torch.half, torch.float, torch.double])
def test_grouping_points(dtype):
def test_grouping_points(dtype, device):
idx = torch.tensor([[[0, 0, 0], [3, 3, 3], [8, 8, 8], [0, 0, 0], [0, 0, 0],
[0, 0, 0]],
[[0, 0, 0], [6, 6, 6], [9, 9, 9], [0, 0, 0], [0, 0, 0],
[0, 0, 0]]]).int().cuda()
[0, 0, 0]]]).int().to(device)
features = torch.tensor([[[
0.5798, -0.7981, -0.9280, -1.3311, 1.3687, 0.9277, -0.4164, -1.8274,
0.9268, 0.8414
Expand All @@ -37,7 +46,7 @@ def test_grouping_points(dtype):
-0.6646, -0.6870, -0.1125, -0.2224, -0.3445,
-1.4049, 0.4990, -0.7037, -0.9924, 0.0386
]]],
dtype=dtype).cuda()
dtype=dtype).to(device)

output = grouping_operation(features, idx)
expected_output = torch.tensor(
Expand All @@ -59,7 +68,7 @@ def test_grouping_points(dtype):
[[-0.6646, -0.6646, -0.6646], [0.4990, 0.4990, 0.4990],
[0.0386, 0.0386, 0.0386], [-0.6646, -0.6646, -0.6646],
[-0.6646, -0.6646, -0.6646], [-0.6646, -0.6646, -0.6646]]]],
dtype=dtype).cuda()
dtype=dtype).to(device)
assert torch.allclose(output, expected_output)


Expand Down

0 comments on commit 8523eee

Please sign in to comment.