Skip to content

Commit

Permalink
[Paddle-TRT] constant-folding (#45494)
Browse files Browse the repository at this point in the history
add constant folding pass, for some model,it will get less latency;
  • Loading branch information
zhoutianzi666 committed Aug 30, 2022
1 parent 9dad4f7 commit 97f43a8
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 7 deletions.
1 change: 1 addition & 0 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ pass_library(delete_dropout_op_pass inference)
pass_library(delete_c_identity_op_pass inference)
pass_library(preln_residual_bias_fuse_pass inference)
pass_library(delete_fill_constant_op_pass inference)
pass_library(constant_folding_pass inference)
pass_library(simplify_with_basic_ops_pass base)
pass_library(fc_elementwise_layernorm_fuse_pass base)
pass_library(skip_layernorm_fuse_pass base)
Expand Down
159 changes: 159 additions & 0 deletions paddle/fluid/framework/ir/constant_folding_pass.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/ir/constant_folding_pass.h"
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"

#include "paddle/fluid/framework/convert_utils.h"

namespace paddle {
namespace framework {
namespace ir {
class Node;
} // namespace ir
} // namespace framework
} // namespace paddle

/*
* When a op's inputs and outputs is determined before feeding data to the
* model, we can remove this op from the model. This ConstantFolding pass can
* remove all these like ops.
*
*/

namespace paddle {
namespace framework {
namespace ir {
namespace patterns {

struct ConstantFolding : public PatternBase {
ConstantFolding(PDPattern *pattern, const std::string &name_scope)
: PatternBase(pattern, name_scope, "constant_folding_pass") {}
};
} // namespace patterns

ConstantFoldingPass::ConstantFoldingPass() {}

void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
PADDLE_ENFORCE_NOT_NULL(
graph, platform::errors::PreconditionNotMet("graph should not be null."));
FusePassBase::Init("constant_folding", graph);
auto *scope = param_scope();

PADDLE_ENFORCE_NOT_NULL(
scope,
platform::errors::Fatal(
"scope must not be null when applying constant floding."));

// Now, I don't want to fold fill_constant op in Paddle-TRT
std::vector<std::string> blacklist{"fill_constant", "feed"};

auto op_node_sorted = framework::ir::TopologyVarientSort(
*graph, static_cast<framework::ir::SortKind>(0));
for (auto *op_node : op_node_sorted) {
if (!op_node->IsOp()) continue;
if (std::find(blacklist.begin(), blacklist.end(), op_node->Name()) !=
blacklist.end())
continue;

bool input_persis = true;
// map is used to record how many time a name string occures in the whole
// graph's nodes
std::map<std::string, int> map;
for (auto in_node : op_node->inputs) {
map[in_node->Name()] = 0;
if (!in_node->Var()->Persistable()) {
input_persis = false;
}
}
for (auto out_node : op_node->outputs) {
map[out_node->Name()] = 0;
}
// Forbid other node in graph having the same name with nodes in map
for (auto iter : map) {
for (auto node : graph->Nodes()) {
if (node->IsVar() && node->Name() == iter.first) {
map[node->Name()]++;
if (map[node->Name()] > 1) {
input_persis = false;
}
}
}
}

framework::Scope *local_scope = new framework::Scope();
std::unordered_set<const paddle::framework::ir::Node *> remove_nodes;
std::unique_ptr<OperatorBase> op;

if (input_persis) {
for (auto in_node : op_node->inputs) {
local_scope->Var(in_node->Var()->Name());
local_scope->FindVar(in_node->Var()->Name())->GetMutable<LoDTensor>();
// This persistable input node is exclusive, and can be removed
if (in_node->outputs.size() == 1L) remove_nodes.emplace(in_node);

auto in_shape = in_node->Var()->GetShape();
auto *global_persis_x_tensor =
scope->FindVar(in_node->Name())->GetMutable<LoDTensor>();
auto *local_x_tensor =
local_scope->FindVar(in_node->Name())->GetMutable<LoDTensor>();
local_x_tensor->Resize(global_persis_x_tensor->dims());
*local_x_tensor = *global_persis_x_tensor;
}

op = paddle::framework::OpRegistry::CreateOp(*op_node->Op());
remove_nodes.emplace(op_node);
for (auto out_node : op_node->outputs) {
local_scope->Var(out_node->Var()->Name());
local_scope->FindVar(out_node->Var()->Name())->GetMutable<LoDTensor>();
// useless out_node can be removed, not need set it persistable !
if (out_node->outputs.size() == 0L) remove_nodes.emplace(out_node);
}
op->Run(*local_scope, platform::CPUPlace());
for (auto out_node : op_node->outputs) {
// this out_node is useless, do not set it persistable
if (out_node->outputs.size() == 0L) continue;
auto out_desc = out_node->Var();
auto out_name = out_desc->Name();
auto *local_out_tensor =
local_scope->FindVar(out_name)->GetMutable<LoDTensor>();
std::vector<int64_t> out_shape;
for (int64_t i = 0; i < local_out_tensor->dims().size(); i++) {
out_shape.push_back(local_out_tensor->dims()[i]);
}
out_desc->SetShape(out_shape);
out_desc->SetPersistable(true);
auto *global_out_tensor = scope->Var(out_name)->GetMutable<LoDTensor>();
*global_out_tensor = *local_out_tensor;
}
GraphSafeRemoveNodes(graph, remove_nodes);
}
delete local_scope;
}
}

} // namespace ir
} // namespace framework
} // namespace paddle

REGISTER_PASS(constant_folding_pass,
paddle::framework::ir::ConstantFoldingPass);
37 changes: 37 additions & 0 deletions paddle/fluid/framework/ir/constant_folding_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "paddle/fluid/framework/ir/fuse_pass_base.h"

namespace paddle {

namespace framework {
namespace ir {

class Graph;

class ConstantFoldingPass : public FusePassBase {
public:
ConstantFoldingPass();
virtual ~ConstantFoldingPass() {}

protected:
void ApplyImpl(ir::Graph* graph) const override;
};

} // namespace ir
} // namespace framework
} // namespace paddle
7 changes: 5 additions & 2 deletions paddle/fluid/inference/api/paddle_pass_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,9 @@ const std::vector<std::string> kTRTSubgraphPasses({
// "yolo_box_fuse_pass", //
"dense_fc_to_sparse_pass", //
"dense_multihead_matmul_to_sparse_pass", //
"tensorrt_subgraph_pass", //
"conv_bn_fuse_pass", //
"constant_folding_pass",
"tensorrt_subgraph_pass", //
"conv_bn_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
// cudnn8.0 has memory leak problem in conv + eltwise + act, so we
Expand Down Expand Up @@ -213,6 +214,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_fuse_pass", //
#endif //
"transpose_flatten_concat_fuse_pass", //
"constant_folding_pass",
// following pass should be located in the last, since it will
// work on all fused ops.
"runtime_context_cache_pass"
Expand Down Expand Up @@ -276,6 +278,7 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
"conv_transpose_bn_fuse_pass", //
"conv_transpose_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"constant_folding_pass",
// following pass should be located in the last, since
// it will work on all fused ops.
"runtime_context_cache_pass"});
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,16 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots,
input_slots->push_back(std::move(response_mask_tensor));
}

/*
* this model is unreasonable, it set a output tensor persistable, so
* ridiculous! so I disable constant_folding_pass
*/

void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
cfg->SwitchSpecifyInputNames();
auto pass_builder = cfg->pass_builder();
pass_builder->DeletePass("constant_folding_pass");
cfg->SwitchIrOptim(true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
namespace paddle {
namespace inference {

/*
* this model is unreasonable, it set a middle-tensor persistable, so
* ridiculous! so I disable constant_folding_pass
*/

using paddle::PaddleTensor;

#ifdef PADDLE_WITH_MKLDNN
Expand All @@ -25,6 +30,8 @@ void SetInt8Config(AnalysisConfig *cfg,
cfg->SetModel(FLAGS_infer_model);
cfg->EnableMKLDNN();
cfg->EnableMkldnnQuantizer();
auto pass_builder = cfg->pass_builder();
pass_builder->DeletePass("constant_folding_pass");
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(data);
cfg->mkldnn_quantizer_config()->SetWarmupData(warmup_data);
cfg->mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_batch_size);
Expand Down
20 changes: 16 additions & 4 deletions paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@
namespace paddle {
namespace inference {

/*
* this model is unreasonable, it set a middle-tensor persistable, so
* ridiculous! so I disable constant_folding_pass
*/

using paddle::PaddleTensor;

void profile(bool use_mkldnn = false, bool use_gpu = false) {
AnalysisConfig config;

SetConfig(&config, use_mkldnn, use_gpu);

auto pass_builder = config.pass_builder();
pass_builder->DeletePass("constant_folding_pass");
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs);
Expand All @@ -48,6 +54,9 @@ TEST(Analyzer_Ernie, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);

auto pass_builder = cfg.pass_builder();
pass_builder->DeletePass("constant_folding_pass");

int num_ops;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis(
Expand All @@ -70,7 +79,8 @@ void compare(bool use_mkldnn = false) {

AnalysisConfig cfg;
SetConfig(&cfg, use_mkldnn, false);

auto pass_builder = cfg.pass_builder();
pass_builder->DeletePass("constant_folding_pass");
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
}
Expand All @@ -84,7 +94,8 @@ TEST(Analyzer_ernie, compare_mkldnn) { compare(true /* use_mkldnn */); }
TEST(Analyzer_Ernie, compare_determine) {
AnalysisConfig cfg;
SetConfig(&cfg);

auto pass_builder = cfg.pass_builder();
pass_builder->DeletePass("constant_folding_pass");
std::vector<std::vector<PaddleTensor>> input_slots_all;
LoadInputData(&input_slots_all);
CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
Expand All @@ -95,7 +106,8 @@ TEST(Analyzer_Ernie, compare_determine) {
TEST(Analyzer_Ernie, compare_results) {
AnalysisConfig cfg;
SetConfig(&cfg);

auto pass_builder = cfg.pass_builder();
pass_builder->DeletePass("constant_folding_pass");
std::vector<std::vector<PaddleTensor>> input_slots_all;
LoadInputData(&input_slots_all);

Expand Down
11 changes: 11 additions & 0 deletions paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,19 @@ int GetNumOps(const AnalysisConfig &cfg) {
return num_ops;
}

/*
* this model is unreasonable, it set a output tensor persistable, so
* ridiculous! so I disable constant_folding_pass
*/

TEST(Analyzer, save_model) {
AnalysisConfig cfg;
SetConfig(&cfg);
cfg.SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");

auto pass_builder = cfg.pass_builder();
pass_builder->DeletePass("constant_folding_pass");

// ensure the path being unique
std::string optimModelPath = FLAGS_infer_model + "/only_for_save_model_test";
MKDIR(optimModelPath.c_str());
Expand All @@ -49,6 +58,8 @@ TEST(Analyzer, save_model) {

AnalysisConfig cfg3;
SetConfig(&cfg3);
auto pass_builder3 = cfg3.pass_builder();
pass_builder3->DeletePass("constant_folding_pass");
cfg3.SetModel(optimModelPath + "/model", optimModelPath + "/params");
int fused_num_ops = GetNumOps(cfg3);
CHECK_LE(fused_num_ops, origin_num_ops);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) {
EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 0);
EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2);
LOG(INFO) << "num_ops: " << num_ops;
EXPECT_EQ(num_ops, 185);
EXPECT_EQ(num_ops, 183);
}

} // namespace seq_pool1_tester
Expand Down

0 comments on commit 97f43a8

Please sign in to comment.