Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Pten] Gru lstm migration #39729

Merged
merged 7 commits into from
Mar 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -580,8 +580,8 @@ function(hip_library TARGET_NAME)
cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(hip_library_SRCS)
# FindHIP.cmake defined hip_add_library, HIP_SOURCE_PROPERTY_FORMAT is requried if no .cu files found
if(NOT ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators")
set_source_files_properties(${hip_library_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
if(NOT (${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators" OR ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/phi/kernels"))
set_source_files_properties(${hip_library_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
endif()
if (hip_library_SHARED OR hip_library_shared) # build *.so
hip_add_library(${TARGET_NAME} SHARED ${hip_library_SRCS})
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ limitations under the License. */
#include "paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h"
#include <string>
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/sequence2batch.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -473,7 +473,7 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
hidden_out->mutable_data<T>(place);
cell_out->mutable_data<T>(place);

math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
phi::funcs::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx);

Expand Down Expand Up @@ -591,7 +591,7 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
#undef MOVE_ONE_BATCH
#undef DEFINE_CUR

math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batched_h_out->set_lod(batched_lod);
to_seq(dev_ctx, *batched_h_out, hidden_out);
batched_c_out->set_lod(batched_lod);
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/fused/fusion_gru_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ limitations under the License. */
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/fc.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/sequence2batch.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
Expand Down Expand Up @@ -368,7 +368,7 @@ class FusionGRUKernel : public framework::OpKernel<T> {
hidden_out->mutable_data<T>(place);
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx);
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
phi::funcs::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;

math::FCFunctor<DeviceContext, T> fc;
if (M > D3) {
Expand Down Expand Up @@ -463,7 +463,7 @@ class FusionGRUKernel : public framework::OpKernel<T> {
batched_input_data = cur_batched_data;
}

math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batched_out->set_lod(batched_lod);
to_seq(dev_ctx, *batched_out, hidden_out);
}
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/fused/fusion_lstm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/fc.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/sequence2batch.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
Expand Down Expand Up @@ -421,7 +421,7 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
hidden_out->mutable_data<T>(place);
cell_out->mutable_data<T>(place);

math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
phi::funcs::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
auto blas = phi::funcs::GetBlas<DeviceContext, T>(dev_ctx);
math::FCFunctor<DeviceContext, T> fc;
Expand Down Expand Up @@ -514,7 +514,7 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
batched_input_data = cur_in_data;
}

math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batched_h_out->set_lod(batched_lod);
to_seq(dev_ctx, *batched_h_out, hidden_out);
batched_c_out->set_lod(batched_lod);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/fused/multi_gru_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/fc.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/sequence2batch.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
Expand Down
28 changes: 14 additions & 14 deletions paddle/fluid/operators/gru_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ limitations under the License. */
#include "paddle/fluid/operators/gru_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/math/detail/gru_cpu_kernel.h"
#include "paddle/fluid/operators/math/detail/gru_kernel.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h"
#include "paddle/phi/kernels/funcs/detail/gru_kernel.h"

DECLARE_int32(paddle_num_threads);

Expand Down Expand Up @@ -316,7 +316,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
batch_hidden->mutable_data<T>(context.GetPlace());

bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
phi::funcs::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);

Expand All @@ -326,7 +326,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
}

int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
phi::funcs::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Expand All @@ -347,9 +347,9 @@ class GRUCPUKernel : public framework::OpKernel<T> {
}
auto batch_starts = batch_gate->lod()[0];
size_t seq_len = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
auto active_node = phi::funcs::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
auto active_gate = phi::funcs::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));

#ifdef PADDLE_WITH_MKLML
Expand Down Expand Up @@ -396,9 +396,9 @@ class GRUCPUKernel : public framework::OpKernel<T> {
frame_size * 2, T(1), gru_value.gate_value, frame_size * 3);
}

math::detail::forward_reset_output(
math::detail::forward::gru_resetOutput<T>(), gru_value, frame_size,
cur_batch_size, active_gate);
phi::funcs::detail::forward_reset_output(
phi::funcs::detail::forward::gru_resetOutput<T>(), gru_value,
frame_size, cur_batch_size, active_gate);

if (gru_value.prev_out_value) {
blas.GEMM_COMPUTE(
Expand All @@ -408,9 +408,9 @@ class GRUCPUKernel : public framework::OpKernel<T> {
frame_size * 3);
}

math::detail::forward_final_output(
math::detail::forward::gru_finalOutput<T>(), gru_value, frame_size,
cur_batch_size, active_node, origin_mode);
phi::funcs::detail::forward_final_output(
phi::funcs::detail::forward::gru_finalOutput<T>(), gru_value,
frame_size, cur_batch_size, active_node, origin_mode);

gru_value.prev_out_value = gru_value.output_value;
}
Expand All @@ -432,7 +432,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();

math::GRUUnitFunctor<DeviceContext, T>::compute(
phi::funcs::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate, origin_mode);

Expand All @@ -441,7 +441,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
#ifdef PADDLE_WITH_MKLML
}
#endif
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
Expand Down
12 changes: 6 additions & 6 deletions paddle/fluid/operators/gru_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class GRUKernel : public framework::OpKernel<T> {
batch_hidden->mutable_data<T>(context.GetPlace());

bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
phi::funcs::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);

Expand All @@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel<T> {
}

int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
phi::funcs::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Expand All @@ -96,9 +96,9 @@ class GRUKernel : public framework::OpKernel<T> {
}
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
auto active_node = phi::funcs::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
auto active_gate = phi::funcs::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
Expand All @@ -111,13 +111,13 @@ class GRUKernel : public framework::OpKernel<T> {
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<DeviceContext, T>::compute(
phi::funcs::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate, origin_mode);
gru_value.prev_out_value = gru_value.output_value;
}

math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
Expand Down
22 changes: 11 additions & 11 deletions paddle/fluid/operators/gru_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/gru_compute.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/phi/kernels/funcs/detail/activation_functions.h"
#include "paddle/phi/kernels/funcs/gru_compute.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/sequence2batch.h"

namespace paddle {
namespace operators {
Expand All @@ -32,7 +32,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
const framework::Tensor& src,
framework::Vector<size_t> index_lod,
framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index_lod, dst, indexed_src);
}
Expand Down Expand Up @@ -63,7 +63,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
auto hidden_dims = hidden->dims();
int frame_size = hidden_dims[1];

math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
phi::funcs::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
LoDTensor batch_hidden_grad, batch_gate_grad, batch_reset_hidden_prev_grad;
batch_hidden_grad.mutable_data<T>(hidden_dims, context.GetPlace());
batch_gate_grad.mutable_data<T>(gate_dims, context.GetPlace());
Expand Down Expand Up @@ -93,12 +93,12 @@ class GRUGradKernel : public framework::OpKernel<T> {
batch_hidden_grad.set_lod(batch_hidden->lod());
to_batch(dev_ctx, *hidden_grad, &batch_hidden_grad, false, is_reverse);

math::GRUMetaValue<T> gru_value;
phi::funcs::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);

math::GRUMetaGrad<T> gru_grad;
phi::funcs::GRUMetaGrad<T> gru_grad;
if (weight_grad) {
gru_grad.gate_weight_grad =
weight_grad->mutable_data<T>(context.GetPlace());
Expand All @@ -112,9 +112,9 @@ class GRUGradKernel : public framework::OpKernel<T> {

auto batch_starts = batch_hidden_grad.lod()[0];
size_t num_batch = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
auto active_node = phi::funcs::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
auto active_gate = phi::funcs::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
for (int n = static_cast<int>(num_batch) - 1; n >= 0; n--) {
int bstart = static_cast<int>(batch_starts[n]);
Expand Down Expand Up @@ -145,13 +145,13 @@ class GRUGradKernel : public framework::OpKernel<T> {
gru_grad.prev_out_grad = hidden_prev_grad_t.data<T>();
}
gru_value.output_value = nullptr;
math::GRUUnitGradFunctor<DeviceContext, T>::compute(
phi::funcs::GRUUnitGradFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, gru_grad, frame_size, cur_batch_size, active_node,
active_gate, origin_mode);
}
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
phi::funcs::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_gate_grad.set_lod(batch_gate->lod());
to_seq(dev_ctx, batch_gate_grad, input_grad);
}
Expand Down
Loading