Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-pick]Update layout autotune for module with no modified (#46541) (#46515) #46880

Merged
merged 3 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 69 additions & 125 deletions paddle/fluid/eager/eager_layout_auto_tune.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,99 +32,74 @@ inline bool NeedTransLayout(
}
return false;
}
inline std::shared_ptr<EagerLayoutTransformer> BaseTransformer(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
bool unstart =
(paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED);
auto first_layout = tensors_vector[0][0].layout();
VLOG(3) << "Layout autotune was is start ? " << (!unstart) << op_name
<< "'s layout is " << first_layout;

transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
}

// For agnostic op like add, relu, exp
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto default_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
// For agnostic op like add, relu, exp
auto first_layout = tensors_vector[0][0].layout();
if (NeedTransLayout(tensors_vector, first_layout)) {
auto desired_layout = DesiredLayout();
bool is_started =
!(desired_layout == paddle::experimental::DataLayout::UNDEFINED);
if (is_started && NeedTransLayout(tensors_vector, first_layout)) {
bool need_trans_back = false;
for (size_t i = 0; i < tensors_vector.size(); i++) {
for (size_t idx = 0; idx < tensors_vector[0].size(); idx++) {
if (4 != tensors_vector[i][idx].shape().size()) {
need_trans_back = true;
VLOG(3) << "Agnostic op " << op_name << " shape is "
<< tensors_vector[i][idx].shape().size() << " and layout is "
<< tensors_vector[i][idx].layout();
}
}
}
auto final_layout = need_trans_back ? default_layout : desired_layout;
auto final_layout = need_trans_back ? DefaultLayout() : desired_layout;
VLOG(4) << op_name << "'s has different layout, need trans to "
<< final_layout;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, final_layout);
}
return BaseTransformer(op_name, tensors_vector);
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
}

// For lightly op like reduce
template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
T* attr) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();

std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
// For lightly op like reduce
if (!(DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED)) {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}

// For lightly op like argmax
template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
T1* axis,
T2* keep_dim) {
VLOG(3) << "Lightly op " << op_name << "'s shape is "
<< tensors_vector[0][0].shape().size() << " and layout is "
<< tensors_vector[0][0].layout();

// For lightly op like argmax
return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
}

// heavily string data_format, data_layout
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::string* attr) {
auto first_layout = tensors_vector[0][0].layout();
// Heavily op with (string) data_format, data_layout
auto transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
op_name, tensors_vector, tensors_vector[0][0].layout());
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
// Layout autotune only supports model with convolutional layers
VLOG(3) << "Optimze Layout was not started " << op_name;
if (op_name != "conv2d") {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return transposer;
} else {
auto data_type = tensors_vector[0][0].dtype();
Expand All @@ -134,7 +109,8 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
bool is_tune_fp16 =
(data_type == paddle::experimental::DataType::FLOAT16) &&
(*attr == "NCHW");
VLOG(3) << "Conv2d_dy's dtype " << data_type << " format" << (*attr);
VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : "
<< op_name;
if (is_tune_fp32) {
paddle::imperative::LayoutAutoTune::Instance().SetDesiredLayout(
paddle::experimental::DataLayout::NCHW);
Expand All @@ -147,58 +123,45 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
paddle::imperative::LayoutAutoTune::Instance().SetDefaultLayout(
paddle::experimental::DataLayout::NCHW);
} else {
VLOG(4) << "DisableLayoutAutoTune accoding to Conv op"
<< " dtype : " << data_type << " format : " << (*attr);
egr::Controller::Instance().DisableLayoutAutoTune();
return transposer;
}
VLOG(3)
<< "Tune the layout from " << *attr << " to "
<< paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
VLOG(4) << "LayoutAutoTune from " << *attr << " to " << DesiredLayout();
}
}

if (paddle::imperative::LayoutAutoTune::Instance().IsHeavilyLayoutSensitive(
op_name)) {
VLOG(3)
<< op_name
<< "'s LayoutTransformer is EagerHeavilyLayoutSensitiveOpTransformer";
auto heavily_transposer =
std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr);
return heavily_transposer;
return std::make_shared<EagerHeavilyLayoutSensitiveOpTransformer>(op_name,
attr);
}

VLOG(3) << op_name << "'s LayoutTransformer is unimplemented. Use default.";
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}

// lightly transpose
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
// lightly transpose
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}

if (op_name == "transpose2" &&
(tensors_vector[0][0].layout() ==
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout())) {
(tensors_vector[0][0].layout() == DesiredLayout())) {
auto trans = std::make_shared<EagerTransposeOpTransformer>(op_name);
trans->SetAttr(attr,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return trans;
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}

// lightly int argmax
Expand All @@ -210,93 +173,74 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis,
bool* keep_dim) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout() ==
paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();

if (op_name == "argmax" &&
(tensors_vector[0][0].layout() == desired_layout) && (*keep_dim)) {
(tensors_vector[0][0].layout() == DesiredLayout()) && (*keep_dim)) {
std::shared_ptr<EagerArgmaxOpTransformer> argmax_transform = nullptr;
argmax_transform = std::make_shared<EagerArgmaxOpTransformer>(op_name);
argmax_transform->SetAttr(axis,
tensors_vector[0][0].layout() ==
paddle::experimental::DataLayout::NHWC);
return argmax_transform;
}
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}

// lightly for flatten
template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
int* start_axis,
int* stop_axis) {
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "Optimze Layout was not started" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}
bool no_tranpose = tensors_vector[0][0].layout() == desired_layout;

bool no_tranpose = tensors_vector[0][0].layout() == DesiredLayout();
bool is_valid = ((*start_axis) == 1 && (*stop_axis) == 3);
if (op_name == "flatten" || op_name == "flatten_contiguous_range") {
if (no_tranpose && is_valid) {
std::shared_ptr<EagerFlattenOpTransformer> flatten_transform = nullptr;
flatten_transform = std::make_shared<EagerFlattenOpTransformer>(op_name);
return flatten_transform;
return std::make_shared<EagerFlattenOpTransformer>(op_name);
}
}

transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}

// lightly int Concat
template <>
inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis) {
auto desired_layout =
paddle::imperative::LayoutAutoTune::Instance().GetDesiredLayout();
auto first_layout = tensors_vector[0][0].layout();
std::shared_ptr<EagerLayoutTransformer> transposer = nullptr;
if (desired_layout == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(3) << "Optimze Layout was not started" << op_name;
transposer = std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, first_layout);
return transposer;
if (DesiredLayout() == paddle::experimental::DataLayout::UNDEFINED) {
VLOG(4) << "Optimze Layout was not started" << op_name;
return std::make_shared<EagerLayoutTransformer>(
op_name, tensors_vector, tensors_vector[0][0].layout());
}

auto desired_layout = DesiredLayout();
if (NeedTransLayout(tensors_vector, desired_layout)) {
VLOG(3) << op_name << " need transpose to default layout";
transposer =
std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
return transposer;
} else {
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
trans->SetAttr(axis, desired_layout);
return trans;
VLOG(4) << op_name << "'s has different layout";
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}
if (op_name == "Concat") {
if (desired_layout == tensors_vector[0][0].layout() &&
tensors_vector[0][0].shape().size() == 4) {
auto trans = std::make_shared<EagerConcatOpTransformer>(op_name);
trans->SetAttr(axis, desired_layout);
return trans;
}
}
return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
}

} // namespace egr
Loading