-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix bug of reduce_sum op. #46045
Fix bug of reduce_sum op. #46045
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,11 +13,60 @@ | |
// limitations under the License. | ||
|
||
#include "paddle/phi/kernels/reduce_sum_kernel.h" | ||
#include <climits> | ||
#include "paddle/phi/core/enforce.h" | ||
#include "paddle/phi/core/kernel_registry.h" | ||
#include "paddle/phi/kernels/funcs/eigen/common.h" | ||
#include "paddle/phi/kernels/gpu/reduce.h" | ||
|
||
namespace phi { | ||
|
||
template <typename T, | ||
int EigenDimSize = 5, | ||
int ReducedDimSize = 1, | ||
bool ReduceAll = false> | ||
void ReduceSumEigen(const KPDevice& dev_ctx, | ||
const DenseTensor& x, | ||
bool reduce_all, | ||
const std::vector<int64_t>& dims, | ||
DataType out_dtype, | ||
DenseTensor* out, | ||
std::vector<int>* reduce_dims) { | ||
// Resize Input Tensor | ||
auto new_x = x; | ||
int added_dims = EigenDimSize - x.dims().size(); | ||
std::vector<int64_t> new_dim(added_dims, 1); | ||
for (int i = 0; i < x.dims().size(); i++) { | ||
new_dim.push_back(x.dims().at(i)); | ||
} | ||
new_x.Resize(phi::make_ddim(new_dim)); | ||
auto eigen_x_tensor = EigenTensor<T, EigenDimSize>::From(x); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
// Create Out Tensor | ||
dev_ctx.Alloc<T>(out); | ||
// Resize Out Tensor | ||
std::vector<int64_t> new_reduced_dim(added_dims, 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Use |
||
for (int i = 0; i < out->dims().size(); i++) { | ||
new_reduced_dim.push_back(out->dims().at(i)); | ||
} | ||
out->Resize(phi::make_ddim(new_reduced_dim)); | ||
constexpr int kReduceOutRank = ReduceAll ? 1 : EigenDimSize - ReducedDimSize; | ||
auto eigen_out_tensor = EigenTensor<T, kReduceOutRank>::From(*out); | ||
for (int i = 0; i < ReducedDimSize; i++) { | ||
(*reduce_dims)[i] += added_dims; | ||
} | ||
auto eigen_reduce_dim = | ||
EigenDim<ReducedDimSize>::From(phi::make_ddim(*reduce_dims)); | ||
// Caculate | ||
eigen_out_tensor.device(*dev_ctx.eigen_device()) = | ||
eigen_x_tensor.sum(eigen_reduce_dim); | ||
std::vector<int64_t> final_out_dim; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer to saving the original output dims first like: auto origin_out_dims = out->dims();
// some other codes...
out->Resize(origin_out_dims); The code above is more readable. |
||
for (int i = added_dims; i < out->dims().size(); i++) { | ||
final_out_dim.push_back(out->dims().at(i)); | ||
} | ||
out->Resize(phi::make_ddim(final_out_dim)); | ||
} | ||
|
||
template <typename T, typename Context> | ||
void SumRawKernel(const Context& dev_ctx, | ||
const DenseTensor& x, | ||
|
@@ -29,10 +78,53 @@ void SumRawKernel(const Context& dev_ctx, | |
if (out_dtype == DataType::UNDEFINED && out->dtype() != x.dtype()) { | ||
out_dtype = out->dtype(); | ||
} | ||
phi::Reduce<T, kps::AddFunctor, kps::IdentityFunctor>( | ||
dev_ctx, x, reduce_all, dims.GetData(), keep_dim, out_dtype, out); | ||
} | ||
if (x.numel() > INT_MAX) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
#ifndef PADDLE_WITH_XPU_KP | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrong macro position. It should be: #ifndef PADDLE_WITH_XPU_KP
if (x.numel() > std::numeric_limits<int32_t>::max()) {
// Some other codes..
return;
}
#endif
// The original codes... |
||
std::vector<int> reduce_dims = phi::funcs::details::GetReduceDim( | ||
dims.GetData(), x.dims().size(), reduce_all); | ||
|
||
#define CALL_EIGEN_REDUCE_SUM_KERNEL(reduce_rank) \ | ||
case reduce_rank: { \ | ||
if (reduce_all) { \ | ||
ReduceSumEigen<T, 5, reduce_rank, true>(dev_ctx, \ | ||
x, \ | ||
reduce_all, \ | ||
dims.GetData(), \ | ||
out_dtype, \ | ||
out, \ | ||
&reduce_dims); \ | ||
} else { \ | ||
ReduceSumEigen<T, 5, reduce_rank, false>(dev_ctx, \ | ||
x, \ | ||
reduce_all, \ | ||
dims.GetData(), \ | ||
out_dtype, \ | ||
out, \ | ||
&reduce_dims); \ | ||
} \ | ||
break; \ | ||
} | ||
|
||
switch (reduce_dims.size()) { | ||
CALL_EIGEN_REDUCE_SUM_KERNEL(1); | ||
CALL_EIGEN_REDUCE_SUM_KERNEL(2); | ||
CALL_EIGEN_REDUCE_SUM_KERNEL(3); | ||
CALL_EIGEN_REDUCE_SUM_KERNEL(4); | ||
CALL_EIGEN_REDUCE_SUM_KERNEL(5); | ||
default: | ||
PADDLE_THROW(phi::errors::Fatal( | ||
"If Input.numel() > INT32_MAX, reduce_sum kernel uses EigenTensor " | ||
"sum for reduce_sum function. As a result, its dim should be <= " | ||
"5.")); | ||
break; | ||
} | ||
#undef CALL_EIGEN_REDUCE_SUM_KERNEL | ||
#endif | ||
} else { | ||
phi::Reduce<T, kps::AddFunctor, kps::IdentityFunctor>( | ||
dev_ctx, x, reduce_all, dims.GetData(), keep_dim, out_dtype, out); | ||
} | ||
} | ||
} // namespace phi | ||
|
||
#ifdef PADDLE_WITH_XPU_KP | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change
new_dim
to benew_x_dim
, which is more readable.Change the type of
new_dim
fromstd::vector<int64_t>
to bestd::array<int64_t, EigenDimSize>
.