From 4743cc8b9a8d77ea47e08a42a16246b538bda56f Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Fri, 2 Dec 2022 16:05:24 +0800
Subject: [PATCH 1/2] [Release2.4] Revert python link prs (#48573)

* Revert "Fix mac link python (#48017)"

This reverts commit 3fa7a736e32508e797616b6344d97814c37d3ff8.

* Revert "[Cherry-pick] Fix python link error (#47811)"

This reverts commit ff642c68d6681596844b5c1bae695a81d1baf3da.

* Update config.go
---
 paddle/fluid/inference/goapi/config.go |  4 ++--
 paddle/fluid/pybind/CMakeLists.txt     | 13 -------------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/paddle/fluid/inference/goapi/config.go b/paddle/fluid/inference/goapi/config.go
index 0aca2a1075fd3..d156252985eb2 100644
--- a/paddle/fluid/inference/goapi/config.go
+++ b/paddle/fluid/inference/goapi/config.go
@@ -332,9 +332,9 @@ func (config *Config) IrOptim() bool {
 /// \param useCalibMode Use TRT int8 calibration(post training
 /// quantization).
 ///
-func (config *Config) EnableTensorRtEngine(workspaceSize int32, maxBatchSize int32, minSubgraphSize int32,
+func (config *Config) EnableTensorRtEngine(workspaceSize int64, maxBatchSize int32, minSubgraphSize int32,
 	precision Precision, useStatic bool, useCalibMode bool) {
-	C.PD_ConfigEnableTensorRtEngine(config.c, C.int32_t(workspaceSize), C.int32_t(maxBatchSize), C.int32_t(minSubgraphSize), C.int32_t(precision), cvtGoBoolToPD(useStatic), cvtGoBoolToPD(useCalibMode))
+	C.PD_ConfigEnableTensorRtEngine(config.c, C.int64_t(workspaceSize), C.int32_t(maxBatchSize), C.int32_t(minSubgraphSize), C.int32_t(precision), cvtGoBoolToPD(useStatic), cvtGoBoolToPD(useCalibMode))
 }
 
 ///
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index a77659ba99d47..0a59caae2bbe8 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -262,10 +262,6 @@ if(WITH_PYTHON)
     list(APPEND OP_FUNCTION_GENERETOR_DEPS cncl_context)
   endif()
 
-  if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
-    list(APPEND OP_FUNCTION_GENERETOR_DEPS ${PYTHON_LIBRARIES})
-  endif()
-
   add_executable(op_function_generator op_function_generator.cc)
   target_link_libraries(op_function_generator ${OP_FUNCTION_GENERETOR_DEPS})
   add_executable(eager_legacy_op_function_generator
@@ -605,13 +601,4 @@ if(WITH_PYTHON)
   get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
   target_link_libraries(${SHARD_LIB_NAME} ${os_dependency_modules})
   add_dependencies(${SHARD_LIB_NAME} op_function_generator_cmd)
-
-  if(APPLE)
-    string(REGEX REPLACE ".+/(.+)" "\\1" PYTHON_LIBRARY_NAME
-                         ${PYTHON_LIBRARIES})
-    # target_link_libraries(${SHARD_LIB_NAME} "-Wl,-rpath,${PYTHON_LIBRARY_NAME}")
-  else()
-    target_link_libraries(${SHARD_LIB_NAME} ${PYTHON_LIBRARIES})
-  endif()
-
 endif()

From 0741d1bb5c72a5ae0697167ab51114e88ec4ead1 Mon Sep 17 00:00:00 2001
From: yuanlehome <yuanlehome@163.com>
Date: Wed, 21 Dec 2022 11:38:39 +0000
Subject: [PATCH 2/2] fix mixed precision inference

---
 .../framework/ir/auto_mixed_precision_pass.cc | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
index bc034301989b0..44b41a89700de 100644
--- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
+++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
@@ -437,6 +437,20 @@ void AutoMixedPrecisionPass::UpdateOpPrecision() const {
             vars_should_not_low_precision.insert(in_var_node->Var()->Name());
           }
         }
+
+        // when op_1 only support cpu kernel. if op_2's intput var is op_1's
+        // output var, then op_2 should not run half.
+        if (GetOpOriginalType(op_type) != "feed" &&
+            !GpuKernelSupportPrecision(GetOpOriginalType(op_type),
+                                       phi::DataType::FLOAT32)) {
+          for (auto* out_var_node : op_node->outputs) {
+            CHECK_EQ(out_var_node->IsVar(), true);
+            if (out_var_node->Var()->Persistable()) continue;
+            if (!VarNodeHasDtype(out_var_node)) continue;
+
+            vars_should_not_low_precision.insert(out_var_node->Var()->Name());
+          }
+        }
       }
     }
   };
@@ -449,6 +463,25 @@ void AutoMixedPrecisionPass::UpdateOpPrecision() const {
       for (auto* op_node : nodes) {
         if (op_run_low_precision_.count(op_node->Op()->Type()) == 0) continue;
 
+        for (auto* in_var_node : op_node->inputs) {
+          CHECK_EQ(in_var_node->IsVar(), true);
+          if (!VarNodeHasDtype(in_var_node)) continue;
+
+          auto* real_in_var_node = real_vars_[in_var_node->Var()->Name()];
+          if (real_in_var_node->Var()->Persistable()) continue;
+
+          if (vars_should_not_low_precision.count(
+                  real_in_var_node->Var()->Name())) {
+            op_run_low_precision_.erase(op_node->Op()->Type());
+            precision_updated = true;
+            VLOG(4) << op_node->Op()->Type()
+                    << " should not run at low precision.";
+            break;
+          }
+        }
+
+        if (op_run_low_precision_.count(op_node->Op()->Type()) == 0) continue;
+
         for (auto* out_var_node : op_node->outputs) {
           CHECK_EQ(out_var_node->IsVar(), true);
           if (!VarNodeHasDtype(out_var_node)) continue;