Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug of reduce_sum op. #46045

Merged
merged 4 commits into from
Sep 17, 2022
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 95 additions & 3 deletions paddle/phi/kernels/kps/reduce_sum_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,60 @@
// limitations under the License.

#include "paddle/phi/kernels/reduce_sum_kernel.h"
#include <climits>
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/gpu/reduce.h"

namespace phi {

template <typename T,
int EigenDimSize = 5,
int ReducedDimSize = 1,
bool ReduceAll = false>
void ReduceSumEigen(const KPDevice& dev_ctx,
const DenseTensor& x,
bool reduce_all,
const std::vector<int64_t>& dims,
DataType out_dtype,
DenseTensor* out,
std::vector<int>* reduce_dims) {
// Resize Input Tensor
auto new_x = x;
int added_dims = EigenDimSize - x.dims().size();
std::vector<int64_t> new_dim(added_dims, 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change new_dim to be new_x_dim, which is more readable.

Change the type of new_dim from std::vector<int64_t> to be std::array<int64_t, EigenDimSize>.

for (int i = 0; i < x.dims().size(); i++) {
new_dim.push_back(x.dims().at(i));
}
new_x.Resize(phi::make_ddim(new_dim));
auto eigen_x_tensor = EigenTensor<T, EigenDimSize>::From(x);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

x->new_x.


// Create Out Tensor
dev_ctx.Alloc<T>(out);
// Resize Out Tensor
std::vector<int64_t> new_reduced_dim(added_dims, 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

new_reduced_dim -> new_out_dim.

Use std::array<int64_t, kReduceOutRank> instead of std::vector<int64_t>.

for (int i = 0; i < out->dims().size(); i++) {
new_reduced_dim.push_back(out->dims().at(i));
}
out->Resize(phi::make_ddim(new_reduced_dim));
constexpr int kReduceOutRank = ReduceAll ? 1 : EigenDimSize - ReducedDimSize;
auto eigen_out_tensor = EigenTensor<T, kReduceOutRank>::From(*out);
for (int i = 0; i < ReducedDimSize; i++) {
(*reduce_dims)[i] += added_dims;
}
auto eigen_reduce_dim =
EigenDim<ReducedDimSize>::From(phi::make_ddim(*reduce_dims));
// Caculate
eigen_out_tensor.device(*dev_ctx.eigen_device()) =
eigen_x_tensor.sum(eigen_reduce_dim);
std::vector<int64_t> final_out_dim;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to saving the original output dims first like:

auto origin_out_dims = out->dims();
// some other codes...
out->Resize(origin_out_dims);

The code above is more readable.

for (int i = added_dims; i < out->dims().size(); i++) {
final_out_dim.push_back(out->dims().at(i));
}
out->Resize(phi::make_ddim(final_out_dim));
}

template <typename T, typename Context>
void SumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
Expand All @@ -29,10 +78,53 @@ void SumRawKernel(const Context& dev_ctx,
if (out_dtype == DataType::UNDEFINED && out->dtype() != x.dtype()) {
out_dtype = out->dtype();
}
phi::Reduce<T, kps::AddFunctor, kps::IdentityFunctor>(
dev_ctx, x, reduce_all, dims.GetData(), keep_dim, out_dtype, out);
}
if (x.numel() > INT_MAX) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

x.numel() >= std::numeric_limits<int32_t>::max()

#ifndef PADDLE_WITH_XPU_KP
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that out_dtype is not considered here. How about raise an exception directly if a valid out_dtype is provided?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrong macro position. It should be:

#ifndef PADDLE_WITH_XPU_KP
if (x.numel() > std::numeric_limits<int32_t>::max()) {
   // Some other codes..
   return;
}
#endif
// The original codes...

std::vector<int> reduce_dims = phi::funcs::details::GetReduceDim(
dims.GetData(), x.dims().size(), reduce_all);

#define CALL_EIGEN_REDUCE_SUM_KERNEL(reduce_rank) \
case reduce_rank: { \
if (reduce_all) { \
ReduceSumEigen<T, 5, reduce_rank, true>(dev_ctx, \
x, \
reduce_all, \
dims.GetData(), \
out_dtype, \
out, \
&reduce_dims); \
} else { \
ReduceSumEigen<T, 5, reduce_rank, false>(dev_ctx, \
x, \
reduce_all, \
dims.GetData(), \
out_dtype, \
out, \
&reduce_dims); \
} \
break; \
}

switch (reduce_dims.size()) {
CALL_EIGEN_REDUCE_SUM_KERNEL(1);
CALL_EIGEN_REDUCE_SUM_KERNEL(2);
CALL_EIGEN_REDUCE_SUM_KERNEL(3);
CALL_EIGEN_REDUCE_SUM_KERNEL(4);
CALL_EIGEN_REDUCE_SUM_KERNEL(5);
default:
PADDLE_THROW(phi::errors::Fatal(
"If Input.numel() > INT32_MAX, reduce_sum kernel uses EigenTensor "
"sum for reduce_sum function. As a result, its dim should be <= "
"5."));
break;
}
#undef CALL_EIGEN_REDUCE_SUM_KERNEL
#endif
} else {
phi::Reduce<T, kps::AddFunctor, kps::IdentityFunctor>(
dev_ctx, x, reduce_all, dims.GetData(), keep_dim, out_dtype, out);
}
}
} // namespace phi

#ifdef PADDLE_WITH_XPU_KP
Expand Down