Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some passes which can be applied to Program #34730

Merged
merged 14 commits into from
Aug 17, 2021
36 changes: 36 additions & 0 deletions paddle/fluid/framework/block_desc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -238,5 +238,41 @@ BlockDesc *BlockDesc::ForwardBlock() const {
return prog_->MutableBlock(static_cast<size_t>(desc_->forward_block_idx()));
}

void BlockDesc::MoveFrom(BlockDesc *block) {
PADDLE_ENFORCE_NOT_NULL(
block, platform::errors::InvalidArgument("Block must be provided."));
if (this == block) {
return;
}

for (auto &pair : block->vars_) {
const auto &name = pair.first;
auto &var_ptr = pair.second;
auto &old_var_ptr = vars_[name];
if (old_var_ptr == nullptr) {
VLOG(10) << "Create new variable " << var_ptr->Name();
old_var_ptr = std::move(var_ptr);
} else {
// NOTE(zjl): cannot release old_var_ptr, because Python
// Variable holds the reference of the C++ VarDesc object.
// If the C++ VarDesc object is destructed, any call to the
// methods of Python Variable may raise segmentation fault.
VLOG(10) << "Update old variable " << var_ptr->Name();
*old_var_ptr = *var_ptr;
}
}
ops_.clear();
for (const auto &src_op : block->ops_) {
AppendOp()->CopyFrom(*src_op);
}
need_update_ = true;
Flush();

block->ops_.clear();
block->vars_.clear();
block->need_update_ = true;
block->Flush();
}

} // namespace framework
} // namespace paddle
2 changes: 2 additions & 0 deletions paddle/fluid/framework/block_desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ class BlockDesc {

ProgramDesc *Program() const { return this->prog_; }

void MoveFrom(BlockDesc *block);

private:
ProgramDesc *prog_; // not_own
proto::BlockDesc *desc_; // not_own
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/details/build_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ struct BuildStrategy {

bool IsFinalized() const { return is_finalized_; }

void ClearFinalized() {
pass_builder_ = nullptr;
is_finalized_ = false;
}

bool IsMultiDevPass(const std::string &pass_name) const;

// Apply the passes built by the pass_builder_. The passes will be
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ if (WITH_TESTING)
endif(WITH_TESTING)
cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS ${GRAPH_PATTERN_DETECTOR_DEPS})

cc_library(op_compat_sensible_pass SRCS op_compat_sensible_pass.cc DEPS graph_pattern_detector op_def_api)
cc_library(op_compat_sensible_pass SRCS op_compat_sensible_pass.cc DEPS graph_pattern_detector op_def_api pass)
cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS graph_pattern_detector executor)
cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS op_compat_sensible_pass)
cc_library(placement_pass_base SRCS placement_pass_base.cc DEPS pass)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_h

cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handle reference_count_pass_helper share_tensor_buffer_op_handle graph pass multi_devices_helper)

cc_library(buffer_shared_inplace_op_pass SRCS buffer_shared_inplace_op_pass.cc DEPS memory_reuse_pass)
cc_library(buffer_shared_inplace_op_pass SRCS buffer_shared_inplace_op_pass.cc DEPS memory_reuse_pass executor_gc_helper)
cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_memory_reuse_pass.cc DEPS memory_reuse_pass)

cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <string>

#include "glog/logging.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/platform/enforce.h"
Expand All @@ -30,6 +31,9 @@ class BufferSharedInplaceOpPass : public MemoryReusePass {
std::string ReuseType() const override { return "inplace"; }

void Run(Graph *graph) const override;

void ApplyImpl(ProgramDesc *main_program,
ProgramDesc *startup_program) const override;
};

void BufferSharedInplaceOpPass::Run(Graph *graph) const {
Expand Down Expand Up @@ -149,6 +153,141 @@ void BufferSharedInplaceOpPass::Run(Graph *graph) const {
}
}

static std::string GetFirstVarName(const OpDesc &op, const std::string &slot,
bool is_input) {
const auto &name_map = is_input ? op.Inputs() : op.Outputs();
auto iter = name_map.find(slot);
if (iter != name_map.end() && !iter->second.empty()) {
return iter->second[0];
}
return kEmptyVarName;
}

static std::vector<std::vector<std::pair<std::string, std::string>>>
GetInplaceVars(const BlockDesc &block, bool use_cuda,
const std::vector<std::string> &skip_vars) {
PADDLE_ENFORCE_EQ(block.ID(), 0, platform::errors::Unimplemented(
"Inplace can only perform in block 0."));
// only take block 0 gc_vars
const auto all_gc_vars =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Readability: suggest to rename 'all_gc_vars' to 'op_gc_vars' or other name, then it is easier to know why its size is same to all_ops.size() and we can know 'op_gc_vars[i]' means 'gc_vars' of 'op[i]'

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

GetEagerDeletionCleanVars(*block.Program(), skip_vars)[0];
const auto all_ops = block.AllOps();
PADDLE_ENFORCE_EQ(all_gc_vars.size(), all_ops.size(),
platform::errors::PermissionDenied(
"GC analysis error: op number not match."));
size_t n = all_ops.size();
std::unordered_set<std::string> visited_vars;
std::unordered_set<std::string> reused_in_vars(skip_vars.begin(),
skip_vars.end());
std::unordered_set<std::string> reused_out_vars(skip_vars.begin(),
skip_vars.end());
for (const auto *op : all_ops) {
if (op->Type() == "share_buffer" || op->Type() == "share_data") {
const auto &inputs = op->Input("X");
const auto &outputs = op->Output("Out");
reused_in_vars.insert(inputs.begin(), inputs.end());
reused_out_vars.insert(outputs.begin(), outputs.end());
}
}

std::vector<std::vector<std::pair<std::string, std::string>>> result(n);
for (size_t i = 0; i < n; ++i) {
const auto &op = *all_ops[i];
const auto &gc_vars = all_gc_vars[i];
const auto inputs = op.InputArgumentNames();
const auto outputs = op.OutputArgumentNames();
visited_vars.insert(inputs.begin(), inputs.end());

auto &infer_inplace = OpInfoMap::Instance().Get(op.Type()).infer_inplace_;
if (gc_vars.empty() || !infer_inplace) {
visited_vars.insert(outputs.begin(), outputs.end());
continue;
}

const auto var_pair = infer_inplace(use_cuda);
std::unordered_multiset<std::string> input_set(inputs.begin(),
inputs.end());
std::unordered_multiset<std::string> output_set(outputs.begin(),
outputs.end());
std::unordered_set<std::string> valid_vars;
for (const auto &var : gc_vars) {
if (var != kEmptyVarName && input_set.count(var) == 1 &&
output_set.count(var) == 0 &&
block.FindVar(var)->GetType() == proto::VarType::LOD_TENSOR) {
valid_vars.insert(var);
}
}

if (valid_vars.empty()) {
visited_vars.insert(outputs.begin(), outputs.end());
continue;
}

for (const auto &pair : var_pair) {
const auto &input_slot = pair.first;
const auto &output_slot = pair.second;
auto input_var = GetFirstVarName(op, input_slot, true);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not forward to read the meaning of boolean without looking at the code of GetFirstVarName.

Suggest for two options:

  1. `/* is_input = */ true

2 Change GetFirstVarName to GetNameMapFirstVar, then pass it op.Inputs() instead of op

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if (input_var == kEmptyVarName || valid_vars.count(input_var) == 0) {
continue;
}
auto output_var = GetFirstVarName(op, output_slot, false);
if (output_var == kEmptyVarName || visited_vars.count(output_var) > 0) {
continue;
}
auto output_var_desc = block.FindVar(output_var);
if (output_var_desc == nullptr || output_var_desc->Persistable() ||
output_var_desc->GetType() != proto::VarType::LOD_TENSOR) {
continue;
}

if (reused_in_vars.count(input_var) > 0 ||
reused_out_vars.count(output_var) > 0) {
continue;
}

// input_var -> output_var is reusable
VLOG(10) << "inplace occurs at op " << i << " " << op.Type() << ": "
<< input_var << " -> " << output_var;
result[i].emplace_back(input_var, output_var);
reused_in_vars.insert(input_var);
reused_out_vars.insert(output_var);
}
visited_vars.insert(outputs.begin(), outputs.end());
std::sort(result[i].begin(), result[i].end());
}
return result;
}

void BufferSharedInplaceOpPass::ApplyImpl(ProgramDesc *main_program,
ProgramDesc *startup_program) const {
bool use_cuda = Get<bool>(kUseCuda);
auto skip_vars = Get<std::vector<std::string>>("mem_opt_skip_vars");

auto *block = main_program->MutableBlock(0);
auto inplace_vars = GetInplaceVars(*block, use_cuda, skip_vars);
PADDLE_ENFORCE_EQ(inplace_vars.size(), block->OpSize(),
platform::errors::PermissionDenied(
"Inplace analysis error: op number not match."));
int64_t n = static_cast<int64_t>(inplace_vars.size());
for (int64_t i = n - 1; i >= 0; --i) {
if (inplace_vars[i].empty()) continue;
auto *op = block->InsertOp(i);
std::vector<std::string> inputs, outputs;
inputs.reserve(inplace_vars[i].size());
outputs.reserve(inplace_vars[i].size());
for (const auto &pair : inplace_vars[i]) {
inputs.push_back(pair.first);
outputs.push_back(pair.second);
}
op->SetType("share_buffer");
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetOutput("XOut", inputs); // add necessary dependency
op->SetAttr("share_dims", std::vector<bool>(inputs.size(), false));
}
block->Flush();
}

} // namespace ir
} // namespace framework
} // namespace paddle
Expand Down
Loading