From 4743cc8b9a8d77ea47e08a42a16246b538bda56f Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 2 Dec 2022 16:05:24 +0800 Subject: [PATCH 1/2] [Release2.4] Revert python link prs (#48573) * Revert "Fix mac link python (#48017)" This reverts commit 3fa7a736e32508e797616b6344d97814c37d3ff8. * Revert "[Cherry-pick] Fix python link error (#47811)" This reverts commit ff642c68d6681596844b5c1bae695a81d1baf3da. * Update config.go --- paddle/fluid/inference/goapi/config.go | 4 ++-- paddle/fluid/pybind/CMakeLists.txt | 13 ------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/inference/goapi/config.go b/paddle/fluid/inference/goapi/config.go index 0aca2a1075fd3..d156252985eb2 100644 --- a/paddle/fluid/inference/goapi/config.go +++ b/paddle/fluid/inference/goapi/config.go @@ -332,9 +332,9 @@ func (config *Config) IrOptim() bool { /// \param useCalibMode Use TRT int8 calibration(post training /// quantization). /// -func (config *Config) EnableTensorRtEngine(workspaceSize int32, maxBatchSize int32, minSubgraphSize int32, +func (config *Config) EnableTensorRtEngine(workspaceSize int64, maxBatchSize int32, minSubgraphSize int32, precision Precision, useStatic bool, useCalibMode bool) { - C.PD_ConfigEnableTensorRtEngine(config.c, C.int32_t(workspaceSize), C.int32_t(maxBatchSize), C.int32_t(minSubgraphSize), C.int32_t(precision), cvtGoBoolToPD(useStatic), cvtGoBoolToPD(useCalibMode)) + C.PD_ConfigEnableTensorRtEngine(config.c, C.int64_t(workspaceSize), C.int32_t(maxBatchSize), C.int32_t(minSubgraphSize), C.int32_t(precision), cvtGoBoolToPD(useStatic), cvtGoBoolToPD(useCalibMode)) } /// diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index a77659ba99d47..0a59caae2bbe8 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -262,10 +262,6 @@ if(WITH_PYTHON) list(APPEND OP_FUNCTION_GENERETOR_DEPS cncl_context) endif() - if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - list(APPEND OP_FUNCTION_GENERETOR_DEPS ${PYTHON_LIBRARIES}) - endif() - add_executable(op_function_generator op_function_generator.cc) target_link_libraries(op_function_generator ${OP_FUNCTION_GENERETOR_DEPS}) add_executable(eager_legacy_op_function_generator @@ -605,13 +601,4 @@ if(WITH_PYTHON) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(${SHARD_LIB_NAME} ${os_dependency_modules}) add_dependencies(${SHARD_LIB_NAME} op_function_generator_cmd) - - if(APPLE) - string(REGEX REPLACE ".+/(.+)" "\\1" PYTHON_LIBRARY_NAME - ${PYTHON_LIBRARIES}) - # target_link_libraries(${SHARD_LIB_NAME} "-Wl,-rpath,${PYTHON_LIBRARY_NAME}") - else() - target_link_libraries(${SHARD_LIB_NAME} ${PYTHON_LIBRARIES}) - endif() - endif() From 0741d1bb5c72a5ae0697167ab51114e88ec4ead1 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Wed, 21 Dec 2022 11:38:39 +0000 Subject: [PATCH 2/2] fix mixed precision inference --- .../framework/ir/auto_mixed_precision_pass.cc | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc index bc034301989b0..44b41a89700de 100644 --- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc +++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc @@ -437,6 +437,20 @@ void AutoMixedPrecisionPass::UpdateOpPrecision() const { vars_should_not_low_precision.insert(in_var_node->Var()->Name()); } } + + // when op_1 only support cpu kernel. if op_2's intput var is op_1's + // output var, then op_2 should not run half. + if (GetOpOriginalType(op_type) != "feed" && + !GpuKernelSupportPrecision(GetOpOriginalType(op_type), + phi::DataType::FLOAT32)) { + for (auto* out_var_node : op_node->outputs) { + CHECK_EQ(out_var_node->IsVar(), true); + if (out_var_node->Var()->Persistable()) continue; + if (!VarNodeHasDtype(out_var_node)) continue; + + vars_should_not_low_precision.insert(out_var_node->Var()->Name()); + } + } } } }; @@ -449,6 +463,25 @@ void AutoMixedPrecisionPass::UpdateOpPrecision() const { for (auto* op_node : nodes) { if (op_run_low_precision_.count(op_node->Op()->Type()) == 0) continue; + for (auto* in_var_node : op_node->inputs) { + CHECK_EQ(in_var_node->IsVar(), true); + if (!VarNodeHasDtype(in_var_node)) continue; + + auto* real_in_var_node = real_vars_[in_var_node->Var()->Name()]; + if (real_in_var_node->Var()->Persistable()) continue; + + if (vars_should_not_low_precision.count( + real_in_var_node->Var()->Name())) { + op_run_low_precision_.erase(op_node->Op()->Type()); + precision_updated = true; + VLOG(4) << op_node->Op()->Type() + << " should not run at low precision."; + break; + } + } + + if (op_run_low_precision_.count(op_node->Op()->Type()) == 0) continue; + for (auto* out_var_node : op_node->outputs) { CHECK_EQ(out_var_node->IsVar(), true); if (!VarNodeHasDtype(out_var_node)) continue;