Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update #23

Merged
merged 21 commits into from
Aug 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
af88699
Revert "[NPU] refine nan check (#34508)" (#34530)
gongweibao Aug 2, 2021
145cdb5
Add basic functions of Program Pass (#34524)
sneaxiy Aug 2, 2021
3b5fc2a
Change formula error in paddle.optimizer (#34539)
sunzhongkai588 Aug 2, 2021
9e0bb91
[HybridParallel]Support 1f1b for PipelineParallel (#34483)
ForFishes Aug 2, 2021
61e51c1
polish sccahce (#34350)
zhwesky2010 Aug 3, 2021
e7dcdb7
fix attr can not find in mkldnn, test=develop (#34567)
wanghuancoder Aug 3, 2021
2aedf16
support more dim for mul op npu (#34546)
houj04 Aug 3, 2021
2d0f3d9
support Kunlun2 (#34459)
QingshuChen Aug 3, 2021
d7493df
[NPU] Support npu op reciprocal and reciprocal grad (#34531)
limin2021 Aug 3, 2021
45fa14f
add trt fp32 multi-thread tests (#34575)
OliverLPH Aug 3, 2021
2714fc7
[docker] delete install_trt in release docker (#34266)
pangyoki Aug 3, 2021
9b6c7eb
[HybridParallel] Support segment for PipelineParallel (#34529)
ForFishes Aug 3, 2021
56b7ebb
[hybrid] remove the using of global ring in hybrid parallel (#34525)
wangxicoding Aug 3, 2021
c79fa1c
Set Tensor Core MathType for bfloat16 in conv using cudnn (#34409)
AshburnLee Aug 4, 2021
1f0f5d3
supplement the function of slice. (#34172)
hbwx24 Aug 4, 2021
54b6c39
fix API bug of Tensor.cuda (#34416)
zhwesky2010 Aug 4, 2021
a7c3836
Fix backward bug (#34582)
MingMingShangTian Aug 4, 2021
3ce14a3
Support npu kernel for Crop op (#34542)
yeliang2258 Aug 4, 2021
f39c3a5
[NPU] Support npu kernel for assign_value op (#34568)
betterpig Aug 4, 2021
ee60e82
Update benchmark-cpu (#34538)
tianshuo78520a Aug 4, 2021
91be876
support set BUILD_DIR for windows CI (#34595)
zhwesky2010 Aug 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ELSE ()
ENDIF()

SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210729")
SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
Expand Down
7 changes: 6 additions & 1 deletion paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,13 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place)

cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference)

IF(WITH_XPU)
cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils)
ELSE()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils)
ENDIF()

cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context)
Expand Down Expand Up @@ -405,7 +410,7 @@ configure_file(commit.h.in commit.h)
# Adapt to custom op mechanism: Include the header files related to the data type
# to avoid exposing the path of the underlying file
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../extension/include)
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include)

if(WITH_ROCM)
hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce)
Expand Down
4 changes: 0 additions & 4 deletions paddle/fluid/framework/details/multi_devices_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,6 @@ typedef std::vector<std::pair<std::string, std::string>> ParamsAndGrads;
constexpr char kParamsAndDenseGrads[] = "params_and_dense_grads";
constexpr char kParamsAndSparseGrads[] = "params_and_sparse_grads";

typedef std::vector<ProgramDesc> ProgramDescs;
constexpr char kProgramDescs[] = "program_descs";
constexpr char kStartupProgramDescs[] = "startup_program_descs";

typedef std::unordered_set<std::string> PinnedVars;
constexpr char kPinnedVars[] = "pinned_vars";

Expand Down
113 changes: 113 additions & 0 deletions paddle/fluid/framework/ir/graph_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_helper.h"
#include <queue>
#include <stack>
#include "paddle/fluid/framework/op_proto_maker.h"

DECLARE_bool(convert_all_blocks);
DEFINE_string(print_sub_graph_dir, "",
"FLAGS_print_sub_graph_dir is used "
"to print the nodes of sub_graphs.");
Expand Down Expand Up @@ -431,6 +433,117 @@ std::vector<ir::Node *> TopologySortGraphByDescOrder(const Graph &graph) {
return ret;
}

static OpDesc *ReplaceScaleLossGradOp(const Node &node, OpDesc *desc) {
desc->SetType("fill_constant");
desc->SetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName(),
(static_cast<int>(OpRole::kBackward) | static_cast<int>(OpRole::kLoss)));
desc->SetAttr("value", 1.0f);
std::vector<std::string> output_names;
for (auto out : node.outputs) {
output_names.emplace_back(out->Name());
}
desc->SetOutput("Out", output_names);
return desc;
}

static void GetGraphOpDesc(const std::vector<Node *> &nodes,
std::vector<OpDesc> *ops) {
for (Node *n : nodes) {
// if node is not Op, skip
if (!n->IsOp()) continue;

// create fill_constant op
if (n->Name() == "scale_loss_grad") {
ops->emplace_back();
auto &desc = ops->back();
ReplaceScaleLossGradOp(*n, &desc);
} else if (n->Op()) {
ops->emplace_back(*n->Op());
}
// delete no OpDesc op
}
}

static void GraphToBlock(const Graph &graph, proto::BlockDesc *block,
const SortKind *sort_kind) {
// Remove the unneeded variables after memory optimization.
std::unordered_set<std::string> vars2remove;
if (graph.Has(kGraphToProgramVarsToRemove)) {
vars2remove =
graph.Get<std::unordered_set<std::string>>(kGraphToProgramVarsToRemove);
VLOG(2) << "graph (id: " << block->idx() << ") to program remove "
<< vars2remove.size() << " nodes";
}

block->clear_vars();
std::unordered_set<std::string> visited_vars;
for (Node *n : graph.Nodes()) {
if (n->IsVar()) {
if (n->Var() && visited_vars.count(n->Var()->Name()) == 0 &&
!vars2remove.count(n->Var()->Name()) &&
n->GetVarNodeBlockId() == graph.GetBlockId()) {
visited_vars.insert(n->Var()->Name());
block->add_vars()->MergeFrom(*n->Var()->Proto());
}
}
}
block->clear_ops();

std::vector<Node *> nodes;
if (sort_kind != nullptr) {
// Inference Memory Optimize relays on this branch.
nodes = TopologyVarientSort(graph, *sort_kind);
} else {
if (FLAGS_convert_all_blocks) {
nodes = TopologySortGraphByDescOrder(graph);
} else {
nodes = TopologySortOperations(graph);
}
}

std::vector<OpDesc> ops;
GetGraphOpDesc(nodes, &ops);
for (auto &op : ops) {
block->add_ops()->MergeFrom(*op.Proto());
}
}

void GraphToProgram(const Graph &graph, ProgramDesc *program,
const SortKind *sort_kind) {
PADDLE_ENFORCE_EQ(graph.IsMainGraph(), true,
platform::errors::InvalidArgument(
"This graph is a sub_graph, "
"and can't convert to program individually"));
PADDLE_ENFORCE_NOT_NULL(
program,
platform::errors::InvalidArgument(
"program must not be nullptr when converting graph to program"));

proto::ProgramDesc program_pb(*(program->Proto()));
auto block = program_pb.mutable_blocks(kRootBlockIndex);
block->set_idx(kRootBlockIndex);

if (FLAGS_convert_all_blocks) {
GraphToBlock(*graph.GetSubGraph(kRootBlockIndex), block, sort_kind);

VLOG(3) << "Graph to program need convert " << graph.SubGraphsSize()
<< " sub graph";
for (size_t idx = 0; idx < graph.SubGraphsSize(); ++idx) {
// avoid kRootBlockIndex not 0
if (idx == kRootBlockIndex) continue;

block = program_pb.add_blocks();
block->set_idx(idx);
GraphToBlock(*graph.GetSubGraph(idx), block, sort_kind);
}
} else {
GraphToBlock(graph, block, sort_kind);
}

program->CopyFrom(program_pb);
}

} // namespace ir
} // namespace framework
} // namespace paddle
7 changes: 7 additions & 0 deletions paddle/fluid/framework/ir/graph_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ namespace paddle {
namespace framework {
namespace ir {

constexpr char kGraphToProgramVarsToRemove[] =
"__graph_to_program_vars_to_remove__";
constexpr char kGraphToProgramSortKind[] = "__graph_to_program_sort_kind__";

// Compare nodes via node id.
class Graph;

Expand Down Expand Up @@ -117,6 +121,9 @@ std::vector<T *> FilterByNodeWrapper(const Graph &graph) {

std::vector<ir::Node *> TopologySortGraphByDescOrder(const Graph &graph);

void GraphToProgram(const Graph &graph, ProgramDesc *p_program,
const SortKind *sort_kind = nullptr);

} // namespace ir
} // namespace framework
} // namespace paddle
115 changes: 4 additions & 111 deletions paddle/fluid/framework/ir/graph_to_program_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,8 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <algorithm>

#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_proto_maker.h"

DECLARE_bool(convert_all_blocks);

namespace paddle {
namespace framework {
class ProgramDesc;
Expand All @@ -33,116 +30,12 @@ namespace framework {
namespace ir {

void GraphToProgramPass::ApplyImpl(ir::Graph* graph) const {
PADDLE_ENFORCE_EQ(graph->IsMainGraph(), true,
platform::errors::InvalidArgument(
"This graph is a sub_graph, "
"and can't convert to program individually"));

ProgramDesc& program = Get<ProgramDesc>("program");

std::unique_ptr<proto::ProgramDesc> program_pb(
new proto::ProgramDesc(*program.Proto()));

auto block = program_pb->mutable_blocks(kRootBlockIndex);
block->set_idx(kRootBlockIndex);

if (FLAGS_convert_all_blocks) {
GraphToBlock(graph->GetSubGraph(kRootBlockIndex), block);

VLOG(3) << "Graph to program need convert " << graph->SubGraphsSize()
<< " sub graph";
for (size_t idx = 0; idx < graph->SubGraphsSize(); ++idx) {
// avoid kRootBlockIndex not 0
if (idx == kRootBlockIndex) continue;

block = program_pb->add_blocks();
block->set_idx(idx);
GraphToBlock(graph->GetSubGraph(idx), block);
}
} else {
GraphToBlock(graph, block);
}

program.CopyFrom(*program_pb);
}

OpDesc* ReplaceScaleLossGradOp(ir::Node* node, OpDesc* desc) {
desc->SetType("fill_constant");
desc->SetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName(),
(static_cast<int>(OpRole::kBackward) | static_cast<int>(OpRole::kLoss)));
desc->SetAttr("value", 1.0f);
std::vector<std::string> output_names;
for (auto out : node->outputs) {
output_names.emplace_back(out->Name());
}
desc->SetOutput("Out", output_names);
return desc;
}

std::vector<OpDesc>* GetGraphOpDesc(const std::vector<ir::Node*>& nodes,
std::vector<OpDesc>* ops) {
for (ir::Node* n : nodes) {
// if node is not Op, skip
if (!n->IsOp()) continue;

// create fill_constant op
if (n->Name() == "scale_loss_grad") {
ops->emplace_back();
auto& desc = ops->back();
ReplaceScaleLossGradOp(n, &desc);
} else if (n->Op()) {
ops->emplace_back(*n->Op());
} else {
// delete no OpDesc op
}
}
return ops;
}

void GraphToProgramPass::GraphToBlock(const Graph* graph,
proto::BlockDesc* block) const {
// Remove the unneeded variables after memory optimization.
std::unordered_set<std::string> vars2remove;
if (graph->Has(kGraphToProgramVarsToRemove)) {
vars2remove = graph->Get<std::unordered_set<std::string>>(
kGraphToProgramVarsToRemove);
VLOG(2) << "graph (id: " << block->idx() << ") to program remove "
<< vars2remove.size() << " nodes";
}

block->clear_vars();
std::unordered_set<std::string> visited_vars;
for (ir::Node* n : graph->Nodes()) {
if (n->IsVar()) {
if (n->Var() && visited_vars.count(n->Var()->Name()) == 0 &&
!vars2remove.count(n->Var()->Name()) &&
n->GetVarNodeBlockId() == graph->GetBlockId()) {
visited_vars.insert(n->Var()->Name());
block->add_vars()->MergeFrom(*n->Var()->Proto());
}
}
}
block->clear_ops();

std::vector<ir::Node*> nodes;
auto& program = Get<ProgramDesc>("program");
if (Has(kGraphToProgramSortKind)) {
// Inference Memory Optimize relays on this branch.
int sort_kind = Get<int>(kGraphToProgramSortKind);
nodes = TopologyVarientSort(
*graph, static_cast<framework::ir::SortKind>(sort_kind));
auto sort_kind = static_cast<SortKind>(Get<int>(kGraphToProgramSortKind));
GraphToProgram(*graph, &program, &sort_kind);
} else {
if (FLAGS_convert_all_blocks) {
nodes = TopologySortGraphByDescOrder(*graph);
} else {
nodes = TopologySortOperations(*graph);
}
}

std::vector<OpDesc> ops;
GetGraphOpDesc(nodes, &ops);
for (auto& op : ops) {
block->add_ops()->MergeFrom(*op.Proto());
GraphToProgram(*graph, &program, nullptr);
}
}

Expand Down
8 changes: 1 addition & 7 deletions paddle/fluid/framework/ir/graph_to_program_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */

#pragma once

#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass.h"

namespace paddle {
Expand All @@ -22,16 +23,9 @@ namespace ir {

class Graph;

const char kGraphToProgramVarsToRemove[] =
"__graph_to_program_vars_to_remove__";
const char kGraphToProgramSortKind[] = "__graph_to_program_sort_kind__";

class GraphToProgramPass : public Pass {
protected:
void ApplyImpl(ir::Graph* graph) const override;

private:
void GraphToBlock(const Graph* graph, proto::BlockDesc* block) const;
};

} // namespace ir
Expand Down
20 changes: 20 additions & 0 deletions paddle/fluid/framework/ir/pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,26 @@ Graph* Pass::Apply(Graph* graph) const {
return graph;
}

void Pass::Apply(ProgramDesc* main_program,
ProgramDesc* startup_program) const {
PADDLE_ENFORCE_NOT_NULL(main_program, platform::errors::InvalidArgument(
"main program must be provided"));
PADDLE_ENFORCE_NOT_NULL(
startup_program,
platform::errors::InvalidArgument("startup program must be provided"));

Graph graph(*main_program);
Apply(&graph);

// TODO(zjl): support details::kStartupProgramDescs and details::kProgramDescs
ProgramDesc new_main_program;
GraphToProgram(graph, &new_main_program);
main_program->CopyFrom(*new_main_program.Proto());

startup_program->Flush();
main_program->Flush();
}

PassRegistry& PassRegistry::Instance() {
static PassRegistry g_pass_info_map;
return g_pass_info_map;
Expand Down
Loading