[Paddle-TRT] constant-folding (#45494)

add constant folding pass， for some model，it will get less latency；
PaddlePaddle · Aug 30, 2022 · 97f43a8 · 97f43a8
1 parent 9dad4f7
commit 97f43a8
Show file tree

Hide file tree

Showing 9 changed files with 244 additions and 7 deletions.
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -147,6 +147,7 @@ pass_library(delete_dropout_op_pass inference)
 pass_library(delete_c_identity_op_pass inference)
 pass_library(preln_residual_bias_fuse_pass inference)
 pass_library(delete_fill_constant_op_pass inference)
+pass_library(constant_folding_pass inference)
 pass_library(simplify_with_basic_ops_pass base)
 pass_library(fc_elementwise_layernorm_fuse_pass base)
 pass_library(skip_layernorm_fuse_pass base)

diff --git a/paddle/fluid/framework/ir/constant_folding_pass.cc b/paddle/fluid/framework/ir/constant_folding_pass.cc
@@ -0,0 +1,159 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/ir/constant_folding_pass.h"
+#include <string>
+#include <vector>
+#include "glog/logging.h"
+#include "paddle/fluid/framework/ir/graph_helper.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/op_version_registry.h"
+#include "paddle/fluid/platform/enforce.h"
+
+#include "paddle/fluid/framework/convert_utils.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+class Node;
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+/*
+ * When a op's inputs and outputs is determined before feeding data to the
+ * model, we can remove this op from the model. This ConstantFolding pass can
+ * remove all these like ops.
+ *
+ */
+
+namespace paddle {
+namespace framework {
+namespace ir {
+namespace patterns {
+
+struct ConstantFolding : public PatternBase {
+  ConstantFolding(PDPattern *pattern, const std::string &name_scope)
+      : PatternBase(pattern, name_scope, "constant_folding_pass") {}
+};
+}  // namespace patterns
+
+ConstantFoldingPass::ConstantFoldingPass() {}
+
+void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
+  PADDLE_ENFORCE_NOT_NULL(
+      graph, platform::errors::PreconditionNotMet("graph should not be null."));
+  FusePassBase::Init("constant_folding", graph);
+  auto *scope = param_scope();
+
+  PADDLE_ENFORCE_NOT_NULL(
+      scope,
+      platform::errors::Fatal(
+          "scope must not be null when applying constant floding."));
+
+  // Now, I don't want to fold fill_constant op in Paddle-TRT
+  std::vector<std::string> blacklist{"fill_constant", "feed"};
+
+  auto op_node_sorted = framework::ir::TopologyVarientSort(
+      *graph, static_cast<framework::ir::SortKind>(0));
+  for (auto *op_node : op_node_sorted) {
+    if (!op_node->IsOp()) continue;
+    if (std::find(blacklist.begin(), blacklist.end(), op_node->Name()) !=
+        blacklist.end())
+      continue;
+
+    bool input_persis = true;
+    // map is used to record how many time a name string occures in the whole
+    // graph's nodes
+    std::map<std::string, int> map;
+    for (auto in_node : op_node->inputs) {
+      map[in_node->Name()] = 0;
+      if (!in_node->Var()->Persistable()) {
+        input_persis = false;
+      }
+    }
+    for (auto out_node : op_node->outputs) {
+      map[out_node->Name()] = 0;
+    }
+    // Forbid other node in graph having the same name with nodes in map
+    for (auto iter : map) {
+      for (auto node : graph->Nodes()) {
+        if (node->IsVar() && node->Name() == iter.first) {
+          map[node->Name()]++;
+          if (map[node->Name()] > 1) {
+            input_persis = false;
+          }
+        }
+      }
+    }
+
+    framework::Scope *local_scope = new framework::Scope();
+    std::unordered_set<const paddle::framework::ir::Node *> remove_nodes;
+    std::unique_ptr<OperatorBase> op;
+
+    if (input_persis) {
+      for (auto in_node : op_node->inputs) {
+        local_scope->Var(in_node->Var()->Name());
+        local_scope->FindVar(in_node->Var()->Name())->GetMutable<LoDTensor>();
+        // This persistable input node is exclusive, and can be removed
+        if (in_node->outputs.size() == 1L) remove_nodes.emplace(in_node);
+
+        auto in_shape = in_node->Var()->GetShape();
+        auto *global_persis_x_tensor =
+            scope->FindVar(in_node->Name())->GetMutable<LoDTensor>();
+        auto *local_x_tensor =
+            local_scope->FindVar(in_node->Name())->GetMutable<LoDTensor>();
+        local_x_tensor->Resize(global_persis_x_tensor->dims());
+        *local_x_tensor = *global_persis_x_tensor;
+      }
+
+      op = paddle::framework::OpRegistry::CreateOp(*op_node->Op());
+      remove_nodes.emplace(op_node);
+      for (auto out_node : op_node->outputs) {
+        local_scope->Var(out_node->Var()->Name());
+        local_scope->FindVar(out_node->Var()->Name())->GetMutable<LoDTensor>();
+        // useless out_node can be removed, not need set it persistable !
+        if (out_node->outputs.size() == 0L) remove_nodes.emplace(out_node);
+      }
+      op->Run(*local_scope, platform::CPUPlace());
+      for (auto out_node : op_node->outputs) {
+        // this out_node is useless, do not set it persistable
+        if (out_node->outputs.size() == 0L) continue;
+        auto out_desc = out_node->Var();
+        auto out_name = out_desc->Name();
+        auto *local_out_tensor =
+            local_scope->FindVar(out_name)->GetMutable<LoDTensor>();
+        std::vector<int64_t> out_shape;
+        for (int64_t i = 0; i < local_out_tensor->dims().size(); i++) {
+          out_shape.push_back(local_out_tensor->dims()[i]);
+        }
+        out_desc->SetShape(out_shape);
+        out_desc->SetPersistable(true);
+        auto *global_out_tensor = scope->Var(out_name)->GetMutable<LoDTensor>();
+        *global_out_tensor = *local_out_tensor;
+      }
+      GraphSafeRemoveNodes(graph, remove_nodes);
+    }
+    delete local_scope;
+  }
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(constant_folding_pass,
+              paddle::framework::ir::ConstantFoldingPass);
diff --git a/paddle/fluid/framework/ir/constant_folding_pass.h b/paddle/fluid/framework/ir/constant_folding_pass.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+
+namespace paddle {
+
+namespace framework {
+namespace ir {
+
+class Graph;
+
+class ConstantFoldingPass : public FusePassBase {
+ public:
+  ConstantFoldingPass();
+  virtual ~ConstantFoldingPass() {}
+
+ protected:
+  void ApplyImpl(ir::Graph* graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -121,8 +121,9 @@ const std::vector<std::string> kTRTSubgraphPasses({
       // "yolo_box_fuse_pass",      //
       "dense_fc_to_sparse_pass",                //
       "dense_multihead_matmul_to_sparse_pass",  //
-      "tensorrt_subgraph_pass",                 //
-      "conv_bn_fuse_pass",                      //
+      "constant_folding_pass",
+      "tensorrt_subgraph_pass",  //
+      "conv_bn_fuse_pass",       //
 #if CUDNN_VERSION >= 7100  // To run conv_fusion, the version of cudnn must be
                            // guaranteed at least v7
 // cudnn8.0 has memory leak problem in conv + eltwise + act, so we
@@ -213,6 +214,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
         "conv_elementwise_add_fuse_pass",      //
 #endif                                         //
         "transpose_flatten_concat_fuse_pass",  //
+        "constant_folding_pass",
         // following pass should be located in the last, since it will
         // work on all fused ops.
         "runtime_context_cache_pass"
@@ -276,6 +278,7 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
                   "conv_transpose_bn_fuse_pass",             //
                   "conv_transpose_eltwiseadd_bn_fuse_pass",  //
                   "is_test_pass",                            //
+                  "constant_folding_pass",
                   // following pass should be located in the last, since
                   // it will work on all fused ops.
                   "runtime_context_cache_pass"});

diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
@@ -169,9 +169,16 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots,
   input_slots->push_back(std::move(response_mask_tensor));
 }
 
+/*
+ * this model is unreasonable, it set a output tensor persistable, so
+ * ridiculous! so I disable constant_folding_pass
+ */
+
 void SetConfig(AnalysisConfig *cfg) {
   cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
   cfg->SwitchSpecifyInputNames();
+  auto pass_builder = cfg->pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
   cfg->SwitchIrOptim(true);
 }
 

diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_int8_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_int8_tester.cc
@@ -17,6 +17,11 @@
 namespace paddle {
 namespace inference {
 
+/*
+ * this model is unreasonable, it set a middle-tensor persistable, so
+ * ridiculous! so I disable constant_folding_pass
+ */
+
 using paddle::PaddleTensor;
 
 #ifdef PADDLE_WITH_MKLDNN
@@ -25,6 +30,8 @@ void SetInt8Config(AnalysisConfig *cfg,
   cfg->SetModel(FLAGS_infer_model);
   cfg->EnableMKLDNN();
   cfg->EnableMkldnnQuantizer();
+  auto pass_builder = cfg->pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
   auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(data);
   cfg->mkldnn_quantizer_config()->SetWarmupData(warmup_data);
   cfg->mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_batch_size);

diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
@@ -17,13 +17,19 @@
 namespace paddle {
 namespace inference {
 
+/*
+ * this model is unreasonable, it set a middle-tensor persistable, so
+ * ridiculous! so I disable constant_folding_pass
+ */
+
 using paddle::PaddleTensor;
 
 void profile(bool use_mkldnn = false, bool use_gpu = false) {
   AnalysisConfig config;
 
   SetConfig(&config, use_mkldnn, use_gpu);
-
+  auto pass_builder = config.pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
   std::vector<std::vector<PaddleTensor>> outputs;
   std::vector<std::vector<PaddleTensor>> inputs;
   LoadInputData(&inputs);
@@ -48,6 +54,9 @@ TEST(Analyzer_Ernie, fuse_statis) {
   AnalysisConfig cfg;
   SetConfig(&cfg);
 
+  auto pass_builder = cfg.pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
+
   int num_ops;
   auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
   auto fuse_statis = GetFuseStatis(
@@ -70,7 +79,8 @@ void compare(bool use_mkldnn = false) {
 
   AnalysisConfig cfg;
   SetConfig(&cfg, use_mkldnn, false);
-
+  auto pass_builder = cfg.pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
   CompareNativeAndAnalysis(
       reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
 }
@@ -84,7 +94,8 @@ TEST(Analyzer_ernie, compare_mkldnn) { compare(true /* use_mkldnn */); }
 TEST(Analyzer_Ernie, compare_determine) {
   AnalysisConfig cfg;
   SetConfig(&cfg);
-
+  auto pass_builder = cfg.pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
   std::vector<std::vector<PaddleTensor>> input_slots_all;
   LoadInputData(&input_slots_all);
   CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
@@ -95,7 +106,8 @@ TEST(Analyzer_Ernie, compare_determine) {
 TEST(Analyzer_Ernie, compare_results) {
   AnalysisConfig cfg;
   SetConfig(&cfg);
-
+  auto pass_builder = cfg.pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
   std::vector<std::vector<PaddleTensor>> input_slots_all;
   LoadInputData(&input_slots_all);
 

diff --git a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc
@@ -31,10 +31,19 @@ int GetNumOps(const AnalysisConfig &cfg) {
   return num_ops;
 }
 
+/*
+ * this model is unreasonable, it set a output tensor persistable, so
+ * ridiculous! so I disable constant_folding_pass
+ */
+
 TEST(Analyzer, save_model) {
   AnalysisConfig cfg;
   SetConfig(&cfg);
   cfg.SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param");
+
+  auto pass_builder = cfg.pass_builder();
+  pass_builder->DeletePass("constant_folding_pass");
+
   //  ensure the path being unique
   std::string optimModelPath = FLAGS_infer_model + "/only_for_save_model_test";
   MKDIR(optimModelPath.c_str());
@@ -49,6 +58,8 @@ TEST(Analyzer, save_model) {
 
   AnalysisConfig cfg3;
   SetConfig(&cfg3);
+  auto pass_builder3 = cfg3.pass_builder();
+  pass_builder3->DeletePass("constant_folding_pass");
   cfg3.SetModel(optimModelPath + "/model", optimModelPath + "/params");
   int fused_num_ops = GetNumOps(cfg3);
   CHECK_LE(fused_num_ops, origin_num_ops);

diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc
@@ -40,7 +40,7 @@ TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) {
   EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 0);
   EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2);
   LOG(INFO) << "num_ops: " << num_ops;
-  EXPECT_EQ(num_ops, 185);
+  EXPECT_EQ(num_ops, 183);
 }
 
 }  // namespace seq_pool1_tester