Skip to content

Commit

Permalink
Add tests for the erase() method
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Sep 2, 2024
1 parent 90fd32d commit 5904f72
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 10 deletions.
3 changes: 3 additions & 0 deletions DataFormats/PortableTestObjects/test/TestSoA.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// A minimal test to ensure that
// - portabletest::TestSoA can be compiled
// - portabletest::TestHostCollection can be allocated
// - portabletest::TestHostCollection can be erased
// - view-based element access works

#include "DataFormats/PortableTestObjects/interface/TestHostCollection.h"
Expand All @@ -14,6 +15,8 @@ int main() {
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
const portabletest::Array flags = {{6, 4, 2, 0}};

collection.erase();

collection.view().r() = 1.;

for (int i = 0; i < size; ++i) {
Expand Down
144 changes: 137 additions & 7 deletions HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
public:
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
// global index of the thread within the grid
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
const portabletest::Array flags = {{6, 4, 2, 0}};

Expand All @@ -41,12 +40,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
ALPAKA_FN_ACC void operator()(TAcc const& acc,
portabletest::TestDeviceMultiCollection2::View<1> view,
double xvalue) const {
// global index of the thread within the grid
const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};

// set this only once in the whole kernel grid
if (thread == 0) {
if (once_per_grid(acc)) {
view.r2() = 2.;
}

Expand All @@ -63,12 +60,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
ALPAKA_FN_ACC void operator()(TAcc const& acc,
portabletest::TestDeviceMultiCollection3::View<2> view,
double xvalue) const {
// global index of the thread within the grid
const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};

// set this only once in the whole kernel grid
if (thread == 0) {
if (once_per_grid(acc)) {
view.r3() = 3.;
}

Expand Down Expand Up @@ -342,4 +337,139 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
return collection;
}

class TestZeroCollectionKernel {
public:
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::ConstView view) const {
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
const portabletest::Array flags = {{0, 0, 0, 0}};

// check this only once in the whole kernel grid
if (once_per_grid(acc)) {
ALPAKA_ASSERT(view.r() == 0.);
}

// make a strided loop over the kernel grid, covering up to "size" elements
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
auto element = view[i];
ALPAKA_ASSERT(element.x() == 0.);
ALPAKA_ASSERT(element.y() == 0.);
ALPAKA_ASSERT(element.z() == 0.);
ALPAKA_ASSERT(element.id() == 0.);
ALPAKA_ASSERT(element.flags() == flags);
ALPAKA_ASSERT(element.m() == matrix);
}
}
};

class TestZeroMultiCollectionKernel2 {
public:
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection2::ConstView<1> view) const {
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};

// check this only once in the whole kernel grid
if (once_per_grid(acc)) {
ALPAKA_ASSERT(view.r2() == 0.);
}

// make a strided loop over the kernel grid, covering up to "size" elements
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
auto element = view[i];
ALPAKA_ASSERT(element.x2() == 0.);
ALPAKA_ASSERT(element.y2() == 0.);
ALPAKA_ASSERT(element.z2() == 0.);
ALPAKA_ASSERT(element.id2() == 0.);
ALPAKA_ASSERT(element.m2() == matrix);
}
}
};

class TestZeroMultiCollectionKernel3 {
public:
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection3::ConstView<2> view) const {
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};

// check this only once in the whole kernel grid
if (once_per_grid(acc)) {
ALPAKA_ASSERT(view.r3() == 0.);
}

// make a strided loop over the kernel grid, covering up to "size" elements
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
auto element = view[i];
ALPAKA_ASSERT(element.x3() == 0.);
ALPAKA_ASSERT(element.y3() == 0.);
ALPAKA_ASSERT(element.z3() == 0.);
ALPAKA_ASSERT(element.id3() == 0.);
ALPAKA_ASSERT(element.m3() == matrix);
}
}
};

class TestZeroStructKernel {
public:
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceObject::Product const* data) const {
// check this only once in the whole kernel grid
if (once_per_grid(acc)) {
ALPAKA_ASSERT(data->x == 0.);
ALPAKA_ASSERT(data->y == 0.);
ALPAKA_ASSERT(data->z == 0.);
ALPAKA_ASSERT(data->id == 0);
}
}
};

// Check that the collection has been filled with zeroes.
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceCollection const& collection) const {
// create a work division with a single block and
// - 32 threads with a single element per thread on a GPU backend
// - 32 elements within a single thread on a CPU backend
auto workDiv = make_workdiv<Acc1D>(1, 32);

// the kernel will make a strided loop over the launch grid to cover all elements in the collection
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view());
}

// Check that the collection has been filled with zeroes.
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const {
// create a work division with a single block and
// - 32 threads with a single element per thread on a GPU backend
// - 32 elements within a single thread on a CPU backend
auto workDiv = make_workdiv<Acc1D>(1, 32);

// the kernels will make a strided loop over the launch grid to cover all elements in the collection
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
alpaka::exec<Acc1D>(
queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view<portabletest::TestSoA2>());
}

// Check that the collection has been filled with zeroes.
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const {
// create a work division with a single block and
// - 32 threads with a single element per thread on a GPU backend
// - 32 elements within a single thread on a CPU backend
auto workDiv = make_workdiv<Acc1D>(1, 32);

// the kernels will make a strided loop over the launch grid to cover all elements in the collection
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
alpaka::exec<Acc1D>(
queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view<portabletest::TestSoA2>());
alpaka::exec<Acc1D>(
queue, workDiv, TestZeroMultiCollectionKernel3{}, collection.const_view<portabletest::TestSoA3>());
}

// Check that the object has been filled with zeroes.
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const {
// create a work division with a single block and
// - 32 threads with a single element per thread on a GPU backend
// - 32 elements within a single thread on a CPU backend
auto workDiv = make_workdiv<Acc1D>(1, 32);

// the kernel will actually use a single thread
alpaka::exec<Acc1D>(queue, workDiv, TestZeroStructKernel{}, object.data());
}

} // namespace ALPAKA_ACCELERATOR_NAMESPACE
5 changes: 5 additions & 0 deletions HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {

void fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue = 0.) const;
void fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue = 0.) const;

void checkZero(Queue& queue, portabletest::TestDeviceCollection const& collection) const;
void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const;
void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const;
void checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const;
};

} // namespace ALPAKA_ACCELERATOR_NAMESPACE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
void produce(edm::StreamID sid, device::Event& event, device::EventSetup const&) const override {
// run the algorithm, potentially asynchronously
portabletest::TestDeviceCollection deviceCollection{size_, event.queue()};
deviceCollection.erase(event.queue());
algo_.checkZero(event.queue(), deviceCollection);
algo_.fill(event.queue(), deviceCollection);

portabletest::TestDeviceObject deviceObject{event.queue()};
deviceObject.erase(event.queue());
algo_.checkZero(event.queue(), deviceObject);
algo_.fillObject(event.queue(), deviceObject, 5., 12., 13., 42);

portabletest::TestDeviceCollection deviceProduct{size_, event.queue()};
algo_.fill(event.queue(), deviceProduct);

portabletest::TestDeviceMultiCollection2 deviceMultiProduct2{{{size_, size2_}}, event.queue()};
deviceMultiProduct2.erase(event.queue());
algo_.checkZero(event.queue(), deviceMultiProduct2);
algo_.fillMulti2(event.queue(), deviceMultiProduct2);

portabletest::TestDeviceMultiCollection3 deviceMultiProduct3{{{size_, size2_, size3_}}, event.queue()};
deviceMultiProduct3.erase(event.queue());
algo_.checkZero(event.queue(), deviceMultiProduct3);
algo_.fillMulti3(event.queue(), deviceMultiProduct3);

// put the asynchronous products into the event without waiting
Expand Down

0 comments on commit 5904f72

Please sign in to comment.