Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Phi]Move swish/hard_swish/pow/round/floor/ceil Kernels to Phi #40913

Merged
merged 1 commit into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 6 additions & 12 deletions paddle/fluid/operators/activation_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,12 @@ REGISTER_ACTIVATION_OP(logsigmoid, LogSigmoid, LogSigmoidFunctor,
REGISTER_ACTIVATION_OP(log2, Log2, Log2Functor, Log2GradFunctor);
REGISTER_ACTIVATION_OP(log10, Log10, Log10Functor, Log10GradFunctor);
REGISTER_ACTIVATION_OP(log1p, Log1p, Log1pFunctor, Log1pGradFunctor);
REGISTER_ACTIVATION_OP(hard_swish, HardSwish, HardSwishFunctor,
HardSwishGradFunctor);
REGISTER_ACTIVATION_OP(swish, Swish, SwishFunctor, SwishGradFunctor);
REGISTER_ACTIVATION_OP(round, Round, RoundFunctor, ZeroGradFunctor);
REGISTER_ACTIVATION_OP(floor, Floor, FloorFunctor, ZeroGradFunctor);
REGISTER_ACTIVATION_OP(ceil, Ceil, CeilFunctor, ZeroGradFunctor);

/* ========================== sigmoid register =============================
*/
Expand Down Expand Up @@ -1778,18 +1784,6 @@ REGISTER_OPERATOR(
ops::ActFwdInplaceInferer, void>::type);
REGISTER_OPERATOR(pow_grad, ops::PowOpGrad,
ops::ActivationGradOpInplaceInferer);

REGISTER_OP_CPU_KERNEL(
pow, ops::PowKernel<plat::CPUDeviceContext, ops::PowFunctor<float>>,
ops::PowKernel<plat::CPUDeviceContext, ops::PowFunctor<double>>,
ops::PowKernel<plat::CPUDeviceContext, ops::PowFunctor<int>>,
ops::PowKernel<plat::CPUDeviceContext, ops::PowFunctor<int64_t>>);
REGISTER_OP_CPU_KERNEL(
pow_grad,
ops::PowGradKernel<plat::CPUDeviceContext, ops::PowGradFunctor<float>>,
ops::PowGradKernel<plat::CPUDeviceContext, ops::PowGradFunctor<double>>,
ops::PowGradKernel<plat::CPUDeviceContext, ops::PowGradFunctor<int>>,
ops::PowGradKernel<plat::CPUDeviceContext, ops::PowGradFunctor<int64_t>>);
/* ========================================================================== */

/* ========================== exp register ============================ */
Expand Down
273 changes: 16 additions & 257 deletions paddle/fluid/operators/activation_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,10 +286,25 @@ USE_PHI_DOUBLE_GRAD_FUNCTOR(Log)
USE_PHI_FUNCTOR(Log2)
USE_PHI_FUNCTOR(Log10)
USE_PHI_FUNCTOR(Log1p)
USE_PHI_FUNCTOR(Swish)
USE_PHI_FUNCTOR(HardSwish)
USE_PHI_FUNCTOR(Pow)

template <typename T>
using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor<T>;

template <typename T>
using RoundFunctor = phi::funcs::RoundFunctor<T>;

template <typename T>
using FloorFunctor = phi::funcs::FloorFunctor<T>;

template <typename T>
using CeilFunctor = phi::funcs::CeilFunctor<T>;

template <typename T>
using ZeroGradFunctor = phi::funcs::ZeroGradFunctor<T>;

// exp(x) = e^x
template <typename T>
struct ExpFunctor : public BaseActivationFunctor<T> {
Expand Down Expand Up @@ -391,46 +406,6 @@ struct RsqrtGradFunctor : public BaseActivationFunctor<T> {
}
};

// ceil(x) = ceiling(x)
template <typename T>
struct CeilFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.ceil();
}
};

template <typename T>
struct ZeroGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0) * out;
}

static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kNoDeps;
}
};

// floor(x) = flooring(x)
template <typename T>
struct FloorFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.floor();
}
};

// round(x) = [x]
template <typename T>
struct RoundFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.round();
}
};

// reciprocal(x) = 1 / x
template <typename T>
struct ReciprocalFunctor : public BaseActivationFunctor<T> {
Expand Down Expand Up @@ -509,51 +484,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
}
};

// HardSwish = min(max(0, x+3), 6) * x / 6
template <typename T>
struct HardSwishFunctor : public BaseActivationFunctor<T> {
float threshold;
float scale;
float offset;

typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}, {"scale", &scale}, {"offset", &offset}};
}

template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = (x + static_cast<T>(offset))
.cwiseMax(static_cast<T>(0))
.cwiseMin(static_cast<T>(threshold)) *
x / static_cast<T>(scale);
}
};

template <typename T>
struct HardSwishGradFunctor : public BaseActivationFunctor<T> {
float threshold;
float scale;
float offset;

typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}, {"scale", &scale}, {"offset", &offset}};
}
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto tmp = ((x + static_cast<T>(offset)) < static_cast<T>(threshold))
.template cast<T>();
dx.device(d) =
dout *
(((x + static_cast<T>(offset)) > static_cast<T>(0)).template cast<T>() *
(static_cast<T>(2) * x + static_cast<T>(offset)) /
static_cast<T>(scale) * tmp +
static_cast<T>(1) * (static_cast<T>(1) - tmp));
}

static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

// For numerical stability, using the following formula instead of softplus(x) =
// log(1 + exp(x))
// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta =
Expand Down Expand Up @@ -776,35 +706,6 @@ struct CELUGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

// FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5198
template <typename T>
struct PowFunctor : public BaseActivationFunctor<T> {
float factor;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"factor", &factor}};
}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.pow(static_cast<T>(factor));
}
};

template <typename T>
struct PowGradFunctor : public BaseActivationFunctor<T> {
float factor;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"factor", &factor}};
}
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * static_cast<T>(factor) *
x.pow(static_cast<T>(factor) - static_cast<T>(1));
}

static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

template <typename T>
struct LogitFunctor {
template <typename Device, typename X, typename Out, typename P>
Expand Down Expand Up @@ -870,39 +771,6 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

template <typename T>
struct SwishFunctor : public BaseActivationFunctor<T> {
float beta;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}};
}

template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x / (static_cast<T>(1) + (static_cast<T>(-beta) * x).exp());
}
};

template <typename T>
struct SwishGradFunctor : public BaseActivationFunctor<T> {
float beta;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}};
}

template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out fake_out, dOut dout, dX dx) const {
auto temp1 = static_cast<T>(1) /
(static_cast<T>(1) + (static_cast<T>(-beta) * x).exp());
auto out = x * temp1;
auto temp2 = temp1 * (static_cast<T>(1) - (static_cast<T>(beta) * out));
dx.device(d) = dout * ((static_cast<T>(beta) * out) + temp2);
}

static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

template <typename T>
struct AbsGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
Expand Down Expand Up @@ -1267,110 +1135,6 @@ class RsqrtDoubleGradKernel
}
};

template <typename DeviceContext, typename Functor>
class PowKernel : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;

void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* X = nullptr;
framework::Tensor* Out = nullptr;
ExtractActivationTensor(context, &X, &Out);
Out->mutable_data<T>(context.GetPlace());

auto x = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "Pow"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Output", "Out", "Pow"));
auto* place =
context.template device_context<DeviceContext>().eigen_device();
Functor functor;
auto attrs = functor.GetAttrs();
for (auto& attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
// get FactorTensor
auto* factor_tensor = context.HasInput("FactorTensor")
? context.Input<framework::Tensor>("FactorTensor")
: nullptr;
if (factor_tensor) {
auto* factor_data = factor_tensor->data<float>();
framework::Tensor cpu_factor_tensor;
if (platform::is_gpu_place(factor_tensor->place())) {
framework::TensorCopySync(*factor_tensor, platform::CPUPlace(),
&cpu_factor_tensor);
factor_data = cpu_factor_tensor.data<float>();
}
auto factor =
std::vector<float>(factor_data, factor_data + factor_tensor->numel());
PADDLE_ENFORCE_EQ(
factor.size(), 1,
platform::errors::InvalidArgument(
"The shape of factor(tensor) must be [1] rather than %d",
factor.size()));
for (auto& attr : attrs) {
*attr.second = factor[0];
}
}
functor(*place, x, out);
}
};

template <typename DeviceContext, typename Functor>
class PowGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor *X, *Out, *dOut;
framework::Tensor* dX = nullptr;
X = Out = dOut = nullptr;
ExtractActivationGradTensor<Functor::FwdDeps()>(context, &X, &Out, &dOut,
&dX);
dX->mutable_data<T>(context.GetPlace());
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Input", "Out@GRAD", "PowGrad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Input", "Out", "PowGrad"));
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "X@GRAD", "PowGrad"));
auto x = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "PowGrad"));
auto* place =
context.template device_context<DeviceContext>().eigen_device();
Functor functor;
auto attrs = functor.GetAttrs();
for (auto& attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
// get FactorTensor
auto* factor_tensor =
context.HasInput("FactorTensor")
? context.Input<framework::LoDTensor>("FactorTensor")
: nullptr;
if (factor_tensor) {
auto* factor_data = factor_tensor->data<float>();
framework::Tensor cpu_factor_tensor;
if (platform::is_gpu_place(factor_tensor->place())) {
framework::TensorCopySync(*factor_tensor, platform::CPUPlace(),
&cpu_factor_tensor);
factor_data = cpu_factor_tensor.data<float>();
}
auto factor =
std::vector<float>(factor_data, factor_data + factor_tensor->numel());
PADDLE_ENFORCE_EQ(
factor.size(), 1,
platform::errors::InvalidArgument(
"The shape of factor(tensor) must be [1] rather than %d",
factor.size()));
for (auto& attr : attrs) {
*attr.second = factor[0];
}
}
functor(*place, x, out, dout, dx);
}
};

template <typename DeviceContext, typename T>
class LogitKernel : public framework::OpKernel<T> {
public:
Expand Down Expand Up @@ -1418,15 +1182,10 @@ class LogitGradKernel : public framework::OpKernel<T> {
} // namespace paddle

#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \
__macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(swish, Swish, SwishFunctor, SwishGradFunctor); \
__macro(mish, Mish, MishFunctor, MishGradFunctor); \
__macro(hard_swish, HardSwish, HardSwishFunctor, HardSwishGradFunctor);
__macro(mish, Mish, MishFunctor, MishGradFunctor);
Loading