diff --git a/paddle/fluid/framework/ipu/compiler.cc b/paddle/fluid/framework/ipu/compiler.cc index 0480adfb6d433..8de89293c1fcc 100644 --- a/paddle/fluid/framework/ipu/compiler.cc +++ b/paddle/fluid/framework/ipu/compiler.cc @@ -11,106 +11,194 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #include "paddle/fluid/framework/ipu/compiler.h" +#include "paddle/fluid/framework/ipu/ipu_utils.h" +#include "paddle/fluid/framework/ir/graph_helper.h" + namespace paddle { namespace framework { namespace ipu { -Compiler::Compiler(const IpuStrategy* ipu_strategy) { - ipu_strategy_ = ipu_strategy; +template +T GetAttrAllowNull(std::string attr, OpDesc* op_desc) { + std::string type = typeid(T).name(); + VLOG(10) << "body attr type is: " << type << " body attr name is: " << attr; + if (op_desc->HasAttr(attr)) { + return BOOST_GET_CONST(T, op_desc->GetAttr(attr)); + } else { + VLOG(10) << "body attr not exist: " << type; + return {}; + } +} + +Compiler::Compiler() { builder_ = popart::Builder::create(); RegisterOpFunc(); } -Compiler::~Compiler() { - builder_.release(); - tensors_.clear(); - ipu_strategy_ = nullptr; -} +Compiler::~Compiler() = default; -void Compiler::InsertTensors(std::vector output_names, - std::vector tensor_ids) { - for (int i = 0; i < tensor_ids.size(); i++) { - std::string tensor_id = tensor_ids[i]; - tensors_.emplace(output_names[i], tensor_ids[i]); - } -} +void Compiler::RegisterOpFunc() { + VLOG(10) << "enter Compiler::RegisterOpFunc"; +#define INT_VEC std::vector +#define FLOAT_VEC std::vector +#define FLOAT float +#define INT std::int64_t +#define BOOL bool +#define STRING std::string +#define STRING_VEC std::vector +#define NONE -void Compiler::InsertTensors(std::vector output_names, - std::string tensor_id) { - tensors_.insert( - std::pair(output_names[0], tensor_id)); -} +#define ARG(Type, Name) , GetAttrAllowNull(#Name, op_desc) +#define POPART_CONST_ARG(Name) , const PopartConstant& Name +#define HOST_SIDE_CONST_ARG(Name) , const HostSideConstant& Name +#define POPART_ATTRIB_VEC_ARG(Name) +#define BODY_ARG(Name) NONE -void Compiler::SetIpuIndexStage(const std::vector& tensor_ids, - const OpDesc* op_desc) { - // TODO(xiaobingw): replace ipu_index with macro or constexpr - VLOG(10) << "enter Compiler::SetIpuIndexStage"; - auto tensor_ids_set = - std::set(tensor_ids.begin(), tensor_ids.end()); - if (op_desc->HasAttr("ipu_index")) { - auto ipu_index = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_index")); - builder_->virtualGraph(tensor_ids_set, ipu_index); - VLOG(10) << "set ipu_index= " << ipu_index - << " for op: " << op_desc->Type(); - if (op_desc->HasAttr("ipu_stage")) { - auto ipu_stage = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_stage")); - builder_->pipelineStage(tensor_ids_set, ipu_stage); - VLOG(10) << "set ipu_stage= " << ipu_stage - << " for op: " << op_desc->Type(); - } - } - VLOG(10) << "leave Compiler::SetIpuIndexStage"; -} + name_function_ = { +#define OP_DECL(FuncName, OnnxImpl, Args) \ + {#FuncName, [&](OpDesc* op_desc) { \ + auto op_type = op_desc->Type(); \ + VLOG(10) << "build op:" << op_type << " args " << #Args; \ + auto inputs = GetOpInputs(op_desc); \ + auto output_names = GetOpOutputs(op_desc); \ + auto aiOnnxOpset1 = builder_->aiGraphcoreOpset1(); \ + auto aiOnnxOpset = builder_->aiOnnxOpset11(); \ + auto output_ids = OnnxImpl(inputs Args); \ + SetIpuIndexStage(output_ids, op_desc); \ + InsertTensors(output_names, output_ids); \ + }}, // NOLINT +#include "paddle/fluid/framework/ipu/supported_ops_autogen.h" + }; -void Compiler::SetIpuIndexStage(const std::string& tensor_id, - const OpDesc* op_desc) { - VLOG(10) << "enter Compiler::SetIpuIndexStage"; - if (op_desc->HasAttr("ipu_index")) { - auto ipu_index = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_index")); - builder_->virtualGraph(tensor_id, ipu_index); - VLOG(10) << "set ipu_index= " << ipu_index - << " for op: " << op_desc->Type(); - if (op_desc->HasAttr("ipu_stage")) { - auto ipu_stage = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_stage")); - builder_->pipelineStage(tensor_id, ipu_stage); - VLOG(10) << "set ipu_stage= " << ipu_stage - << " for op: " << op_desc->Type(); - } - } - VLOG(10) << "leave Compiler::SetIpuIndexStage"; +#undef OP_DECL +// #undef OP_DECL_NO_RETURN +#undef BODY_ARG +#undef POPART_ATTRIB_VEC_ARG +#undef HOST_SIDE_CONST_ARG +#undef POPART_CONST_ARG +#undef ARG +#undef NONE +#undef STRING_VEC +#undef STRING +#undef BOOL +#undef INT +#undef FLOAT +#undef FLOAT_VEC +#undef INT_VEC + + // // self register ops + // #include "paddle/fluid/framework/ipu/supported_ops_custom.h" + // name_function_.emplace("popart_reducemean", ReduceMeanHandler); + // name_function_.emplace("popart_batchnormalization", BatchNormHandler); + // name_function_.emplace("popart_constant", Constant); + // name_function_.emplace("popart_nllloss", NllLoss); + // name_function_.emplace("popart_groupnormalization", Groupnormalization); + + // // used for debug + // for (auto elem : name_function_) { + // VLOG(10) << "registered in map : " << elem.first << " second " + // << &(elem.second); + // } } -template -T GetAttrAllowNull(std::string attr, OpDesc* op_desc) { - std::string type = typeid(T).name(); - VLOG(1) << "body attr type is: " << type << " body attr name is: " << attr; - if (op_desc->HasAttr(attr)) { - return BOOST_GET_CONST(T, op_desc->GetAttr(attr)); - } else { - VLOG(1) << "body attr not exist: " << type; - return {}; +void Compiler::LowerBody(const ir::Graph* graph) { + VLOG(10) << "enter Compiler::LowerBody"; + auto nodes = ir::TopologySortOperations(*graph); + for (auto* node : nodes) { + auto* op_desc = node->Op(); + auto op_type = op_desc->Type(); + VLOG(10) << "node->type: " << op_type; + + auto itr = name_function_.find(op_type); + if (itr != name_function_.end()) { + itr->second(node->Op()); + } else if (op_type == "popart_constant") { + auto dims = + BOOST_GET_CONST(std::vector, op_desc->GetAttr("dims")); + auto dtype_ = BOOST_GET_CONST(int, op_desc->GetAttr("dtype")); + auto dtype = OnnxDtype2PopartType(dtype_); + popart::TensorInfo tensor_info{dtype, dims}; + auto value_attr = op_desc->GetAttr("value"); + auto const_data = std::unique_ptr{}; + switch (dtype) { + case popart::DataType::FLOAT: + const_data.reset(new popart::ConstVoidData( + BOOST_GET_CONST(std::vector, value_attr).data(), + tensor_info)); + break; + case popart::DataType::INT32: + const_data.reset(new popart::ConstVoidData( + BOOST_GET_CONST(std::vector, value_attr).data(), + tensor_info)); + break; + case popart::DataType::DOUBLE: + const_data.reset(new popart::ConstVoidData( + BOOST_GET_CONST(std::vector, value_attr).data(), + tensor_info)); + break; + case popart::DataType::INT64: + const_data.reset(new popart::ConstVoidData( + BOOST_GET_CONST(std::vector, value_attr).data(), + tensor_info)); + break; + default: + PADDLE_THROW( + platform::errors::Unimplemented("popart::DataType %d", dtype)); + } + popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data); + SetIpuIndexStage(result, op_desc); + InsertTensors(GetOpOutputs(op_desc), result); + } else if (op_type == "popart_reducemean") { + auto inputs = GetOpInputs(op_desc); + auto axes = nonstd::optional>(); + if (op_desc->HasAttr("axes")) { + axes = BOOST_GET_CONST(std::vector, op_desc->GetAttr("axes")); + } + auto keepdims = BOOST_GET_CONST(int64_t, op_desc->GetAttr("keepdims")); + popart::TensorId result = + builder_->aiOnnxOpset11().reducemean(inputs, axes, keepdims); + SetIpuIndexStage(result, op_desc); + InsertTensors(GetOpOutputs(op_desc), result); + } else if (op_type == "popart_batchnormalization") { + auto inputs = GetOpInputs(op_desc); + auto outputs = GetOpOutputs(op_desc); + auto num_outputs = outputs.size(); + auto epsilon = BOOST_GET_CONST(float, op_desc->GetAttr("epsilon")); + auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum")); + auto result = builder_->aiOnnxOpset11().batchnormalization( + inputs, num_outputs, epsilon, momentum); + SetIpuIndexStage(result, op_desc); + InsertTensors(GetOpOutputs(op_desc), result); + } else if (op_type == "popart_nllloss") { + auto inputs = GetOpInputs(op_desc); + auto ignoreIndex = BOOST_GET_CONST(int, op_desc->GetAttr("ignoreIndex")); + auto result = builder_->aiGraphcoreOpset1().nllloss( + inputs, popart::ReductionType::NoReduction, ignoreIndex); + SetIpuIndexStage(result, op_desc); + InsertTensors(GetOpOutputs(op_desc), result); + } else { + PADDLE_THROW(platform::errors::NotFound("%s is not registered", op_type)); + } } + VLOG(10) << "leave Compiler::LowerBody"; } void Compiler::InitInputs(ir::Graph* graph, const std::vector& feed_list) { for (const auto& feed_name : feed_list) { - VLOG(1) << feed_name; - for (const ir::Node* n : graph->Nodes()) { if (n->IsVar()) { auto* var_desc = n->Var(); if (feed_name == var_desc->Name()) { - // Get tensor_info from var_desc - VLOG(1) << "feed_name= " << var_desc->Name(); + VLOG(10) << "feed_name= " << var_desc->Name(); auto data_type = VarType2PopartType(var_desc->GetDataType()); popart::TensorInfo input_info{data_type, var_desc->GetShape()}; - // Create popart tensor - VLOG(1) << "popart input_info = " << input_info; + VLOG(10) << "popart input_info = " << input_info; popart::TensorId tensor_id = builder_->addInputTensor(input_info); - VLOG(1) << "popart input tensor id = " << tensor_id; + VLOG(10) << "popart input tensor id = " << tensor_id; inputs_.push_back(tensor_id); tensors_.emplace(var_desc->Name(), tensor_id); } @@ -125,8 +213,8 @@ void Compiler::InitOutputs(const std::vector& fetch_list) { PADDLE_ENFORCE_NE(tensor, tensors_.end(), platform::errors::NotFound( "output tensor %s does not exist.", fetch_name)); - VLOG(1) << "fetch_name= " << fetch_name; - VLOG(1) << "popart output tensor id = " << tensor->second; + VLOG(10) << "fetch_name= " << fetch_name; + VLOG(10) << "popart output tensor id = " << tensor->second; builder_->addOutputTensor(tensor->second); outputs_.push_back(tensor->second); } @@ -136,8 +224,7 @@ void Compiler::LowerWeights(const ir::Graph* graph, const Scope* scope_) { PADDLE_ENFORCE_NOT_NULL(scope_, platform::errors::PreconditionNotMet( "You should call set_scope before LowerWeights")); - // at this step, i think the graph doesn't contains optimizer - // related states + // at this step, the graph doesn't contains optimizer related states for (const auto* node : graph->Nodes()) { if (node->IsVar() && !node->IsCtrlVar() && node->Var()) { if (node->Var()->Persistable()) { @@ -161,98 +248,94 @@ void Compiler::LowerWeights(const ir::Graph* graph, const Scope* scope_) { } } -std::vector Compiler::GetOpInputs(const OpDesc* op) { - auto inputs_ = op->Input("__inputs__"); - std::vector inputs; - for (const auto& in : inputs_) { - if (tensors_.find(in) != tensors_.end()) { - inputs.push_back(tensors_[in]); - } else { - inputs.push_back(in); +void Compiler::InsertTensors(const std::vector& output_names, + const std::vector& tensor_ids) { + PADDLE_ENFORCE_EQ(output_names.size(), tensor_ids.size(), + platform::errors::Fatal("InsertTensors size mismatch")); + for (int i = 0; i < tensor_ids.size(); i++) { + std::string tensor_id = tensor_ids[i]; + tensors_.emplace(output_names[i], tensor_ids[i]); + } +} + +void Compiler::InsertTensors(const std::vector& output_names, + const std::string& tensor_id) { + PADDLE_ENFORCE_EQ(output_names.size(), 1, + platform::errors::Fatal("InsertTensors size mismatch")); + tensors_.emplace(output_names[0], tensor_id); +} + +void Compiler::SetIpuIndexStage(const std::vector& tensor_ids, + const OpDesc* op_desc) { + // TODO(xiaobingw): replace ipu_index with macro or constexpr + VLOG(10) << "enter Compiler::SetIpuIndexStage"; + auto tensor_ids_set = + std::set(tensor_ids.begin(), tensor_ids.end()); + if (op_desc->HasAttr("ipu_index")) { + auto ipu_index = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_index")); + builder_->virtualGraph(tensor_ids_set, ipu_index); + VLOG(10) << "set ipu_index= " << ipu_index + << " for op: " << op_desc->Type(); + if (op_desc->HasAttr("ipu_stage")) { + auto ipu_stage = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_stage")); + builder_->pipelineStage(tensor_ids_set, ipu_stage); + VLOG(10) << "set ipu_stage= " << ipu_stage + << " for op: " << op_desc->Type(); } } - return inputs; + VLOG(10) << "leave Compiler::SetIpuIndexStage"; } -void Compiler::RegisterOpFunc() { - VLOG(1) << "enter Compiler::RegisterOpFunc"; -#define INT_VEC std::vector -#define FLOAT_VEC std::vector -#define FLOAT float -#define INT std::int64_t -#define BOOL bool -#define STRING std::string -#define STRING_VEC std::vector -#define NONE +void Compiler::SetIpuIndexStage(const std::string& tensor_id, + const OpDesc* op_desc) { + VLOG(10) << "enter Compiler::SetIpuIndexStage"; + if (op_desc->HasAttr("ipu_index")) { + auto ipu_index = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_index")); + builder_->virtualGraph(tensor_id, ipu_index); + VLOG(10) << "set ipu_index= " << ipu_index + << " for op: " << op_desc->Type(); + if (op_desc->HasAttr("ipu_stage")) { + auto ipu_stage = BOOST_GET_CONST(int, op_desc->GetAttr("ipu_stage")); + builder_->pipelineStage(tensor_id, ipu_stage); + VLOG(10) << "set ipu_stage= " << ipu_stage + << " for op: " << op_desc->Type(); + } + } + VLOG(10) << "leave Compiler::SetIpuIndexStage"; +} -#define ARG(Type, Name) , GetAttrAllowNull(#Name, op_desc) -#define POPART_CONST_ARG(Name) , const PopartConstant& Name -#define HOST_SIDE_CONST_ARG(Name) , const HostSideConstant& Name -#define POPART_ATTRIB_VEC_ARG(Name) -#define BODY_ARG(Name) NONE +std::vector Compiler::GetTensorShape(const std::string& name) { + return builder_->getTensorShape(tensors_[name]); +} - name_function_ = { -#define OP_DECL(FuncName, OnnxImpl, Args) \ - {#FuncName, [&](OpDesc* op_desc) { \ - auto op_type = op_desc->Type(); \ - VLOG(1) << "build op:" << op_type << " args " << #Args; \ - auto inputs = GetOpInputs(op_desc); \ - auto output_names = op_desc->Output("__outputs__"); \ - auto aiOnnxOpset1 = builder_->aiGraphcoreOpset1(); \ - auto aiOnnxOpset = builder_->aiOnnxOpset11(); \ - auto output_ids = OnnxImpl(inputs Args); \ - SetIpuIndexStage(output_ids, op_desc); \ - InsertTensors(output_names, output_ids); \ - }}, -#include "paddle/fluid/framework/ipu/supported_ops_autogen.h" - }; +std::string Compiler::GetModelProto() { return builder_->getModelProto(); } -#undef OP_DECL -// #undef OP_DECL_NO_RETURN -#undef BODY_ARG -#undef POPART_ATTRIB_VEC_ARG -#undef HOST_SIDE_CONST_ARG -#undef POPART_CONST_ARG -#undef ARG -#undef NONE -#undef STRING_VEC -#undef STRING -#undef BOOL -#undef INT -#undef FLOAT -#undef FLOAT_VEC -#undef INT_VEC +void Compiler::SaveModelProto(const std::string& path) { + builder_->saveModelProto(path); +} -// self register ops -#include "paddle/fluid/framework/ipu/supported_ops_custom.h" - name_function_.emplace("popart_reducemean", ReduceMeanHandler); - name_function_.emplace("popart_batchnormalization", BatchNormHandler); - name_function_.emplace("popart_constant", Constant); - name_function_.emplace("popart_nllloss", NllLoss); - name_function_.emplace("popart_groupnormalization", Groupnormalization); +void Compiler::SaveModelProtoNoCheck(const std::string& path) { + auto proto = builder_->getModelProto(); + std::ofstream onnxfile(path, std::ios_base::binary); + onnxfile.write(proto.data(), proto.size()); + onnxfile.close(); } -void Compiler::LowerBody(const ir::Graph* graph) { - VLOG(10) << "enter Compiler::LowerBody"; - // used for debug - for (auto elem : name_function_) { - VLOG(1) << "registered in map : " << elem.first << " second " - << &(elem.second); - } - auto nodes = paddle::framework::ir::TopologySortOperations(*graph); - for (auto* node : nodes) { - OpDesc* op = node->Op(); - VLOG(1) << "node->type: " << op->Type(); - PADDLE_ENFORCE_GT( - name_function_.count(op->Type()), 0, - platform::errors::NotFound( - "Do not found operator convert function, please make " - "sure it is registered in file \"supported_ops_autogen.h\" or " - "\"supported_ops_custom.h\"")); - auto func = name_function_[op->Type()]; - func(node->Op()); +std::vector Compiler::GetOpInputs(const OpDesc* op) { + auto ins = op->Input("__inputs__"); + std::vector inputs; + for (const auto& in : ins) { + if (tensors_.find(in) != tensors_.end()) { + inputs.push_back(tensors_[in]); + } else { + inputs.push_back(in); + } } - VLOG(10) << "leave Compiler::LowerBody"; + return inputs; +} + +std::vector Compiler::GetOpOutputs(const OpDesc* op) { + return op->Output("__outputs__"); } } // namespace ipu diff --git a/paddle/fluid/framework/ipu/compiler.h b/paddle/fluid/framework/ipu/compiler.h index 6f5c44b6b0ae6..edc45e1d0ea33 100644 --- a/paddle/fluid/framework/ipu/compiler.h +++ b/paddle/fluid/framework/ipu/compiler.h @@ -11,29 +11,13 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #pragma once -#include + #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/framework/ipu/ipu_strategy.h" -#include "paddle/fluid/framework/ipu/ipu_utils.h" #include "paddle/fluid/framework/ir/graph.h" -#include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/node.h" -#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/framework/scope.h" namespace paddle { namespace framework { @@ -41,42 +25,45 @@ namespace ipu { class Compiler { public: - explicit Compiler(const IpuStrategy *ipu_strategy); + Compiler(); ~Compiler(); + + void RegisterOpFunc(); + void LowerBody(const ir::Graph *graph); void InitInputs(ir::Graph *graph, const std::vector &feed_list); void InitOutputs(const std::vector &fetch_list); void LowerWeights(const ir::Graph *graph, const Scope *scope_); - void RegisterOpFunc(); - void LowerBody(const ir::Graph *graph); - std::vector GetOpInputs(const OpDesc *op); - - void InsertTensors(std::vector output_names, - std::vector tensor_ids); - void InsertTensors(std::vector output_names, - std::string tensor_id); + void InsertTensors(const std::vector &output_names, + const std::vector &tensor_ids); + void InsertTensors(const std::vector &output_names, + const std::string &tensor_id); void SetIpuIndexStage(const std::vector &tensor_ids, const OpDesc *op_desc); - void SetIpuIndexStage(const std::string &tensor_id, - const OpDesc *op_desc); + void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc); std::vector GetInputs() { return inputs_; } std::vector GetOutputs() { return outputs_; } std::map GetTensors() { return tensors_; } - std::vector GetTensorShape(std::string name) { - return builder_->getTensorShape(tensors_[name]); - } - std::string GetModelProto() { return builder_->getModelProto(); }; - void SaveModelProto(std::string name) { builder_->saveModelProto(name); } + std::vector GetTensorShape(const std::string &name); + + std::string GetModelProto(); + void SaveModelProto(const std::string &path); + void SaveModelProtoNoCheck(const std::string &path); + + private: + std::vector GetOpInputs(const OpDesc *op); + std::vector GetOpOutputs(const OpDesc *op); private: - std::map tensors_; std::unique_ptr builder_; - const IpuStrategy *ipu_strategy_; + + std::map tensors_; std::vector inputs_; std::vector outputs_; - using Func = std::function; - std::unordered_map name_function_; + + using OpFunc = std::function; + std::unordered_map name_function_; }; } // namespace ipu diff --git a/paddle/fluid/framework/ipu/ipu_backend.cc b/paddle/fluid/framework/ipu/ipu_backend.cc index f11581ee1de66..dee5b384499ad 100644 --- a/paddle/fluid/framework/ipu/ipu_backend.cc +++ b/paddle/fluid/framework/ipu/ipu_backend.cc @@ -38,75 +38,83 @@ namespace ipu { std::shared_ptr IpuBackend::instance_ = nullptr; -IpuBackend::IpuBackend() {} +IpuBackend::IpuBackend() { compiler_ = std::make_shared(); } + +IpuBackend::~IpuBackend() { + if (instance_ == nullptr) { + return; + } + + // detach device + if (curr_device_ != nullptr && curr_device_->isAttached()) { + curr_device_->detach(); + } +} + +std::shared_ptr IpuBackend::GetInstance() { + if (!instance_) { + instance_.reset(new IpuBackend()); + } + return instance_; +} void IpuBackend::Compile(ir::Graph* graph, const std::vector& feed_list, const std::vector& fetch_list) { - VLOG(1) << "-- in Compile --"; - compiler_ = std::make_shared(ipu_strategy_); + VLOG(10) << "enter IpuBackend::Compile"; compiler_->InitInputs(graph, feed_list); compiler_->LowerWeights(graph, scope_); compiler_->LowerBody(graph); compiler_->InitOutputs(fetch_list); - VLOG(1) << "-- fetch_list --"; - for (const auto& fetch_name : fetch_list) { - VLOG(1) << fetch_name; - } + VLOG(10) << "leave IpuBackend::Compile"; } -std::unique_ptr IpuBackend::GetPopartOptimizer() { - // TODO(xiaobingw): change type_ to enum - PADDLE_ENFORCE_NE( - optimizer_.type_, "", - platform::errors::InvalidArgument("Optimizer type have not been set.")); - if (optimizer_.type_ == "sgd") { - auto optimizer = std::make_unique( - popart::OptimizerValue(GetLRFromScope(), false), - popart::OptimizerValue(popart::SGD::getUnsetWeightDecay()), - popart::OptimizerValue(popart::SGD::getUnsetMomentum()), - popart::OptimizerValue(popart::SGD::getUnsetDampening()), - popart::OptimizerValue(popart::SGD::getUnsetVelocityScaling()), - popart::OptimizerValue(popart::SGD::getUnsetLossScaling())); - return optimizer; - } else if (optimizer_.type_ == "adam") { - auto optimizer = std::make_unique( - popart::OptimizerValue(GetLRFromScope(), false), - popart::OptimizerValue(popart::Adam::getUnsetWeightDecay()), - popart::OptimizerValue(GetOptimizerAttr("beta1"), false), - popart::OptimizerValue(GetOptimizerAttr("beta2"), false), - popart::OptimizerValue(GetOptimizerAttr("epsilon"), false), - popart::OptimizerValue(popart::Adam::getUnsetLossScaling()), - popart::AdamMode::Adam, popart::WeightDecayMode::Decay, - popart::DataType::FLOAT, popart::DataType::FLOAT, - popart::DataType::FLOAT); - return optimizer; - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Optimizer %s is not implemented now.", optimizer_.type_)); +void IpuBackend::Run(const std::vector& inputs, + const std::vector& outputs) { + if (!is_prepared_) { + Prepare(); + is_prepared_ = true; } -} -std::vector IpuBackend::GetTensorShape(const std::string& var_name) { - auto oshape = compiler_->GetTensorShape(var_name); - oshape.insert(oshape.begin(), ipu_strategy_->batches_per_step); - return oshape; + std::map popart_inputs; + std::map input_wrappers; + auto input_tensors = compiler_->GetInputs(); + for (size_t i = 0; i < inputs.size(); i++) { + auto tensor_id = input_tensors[i]; + auto tensor = const_cast(inputs[i]); + input_wrappers.emplace(tensor_id, PaddleIArray(tensor)); + popart_inputs.emplace(tensor_id, input_wrappers.at(tensor_id)); + } + + std::map popart_anchors; + std::map anchor_wrappers; + auto output_tensors = compiler_->GetOutputs(); + for (size_t i = 0; i < outputs.size(); i++) { + auto tensor_id = output_tensors[i]; + auto tensor = const_cast(outputs[i]); + anchor_wrappers.emplace(tensor_id, PaddleIArray(tensor)); + popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id)); + } + + if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) { + VLOG(10) << "Update optimizer learning rate..."; + auto popart_optimizer = GetPopartOptimizer(); + auto session = dynamic_cast(session_.get()); + session->updateOptimizerFromHost(popart_optimizer.get()); + } + + popart::StepIO stepio(popart_inputs, popart_anchors); + VLOG(10) << "Running..."; + session_->run(stepio); + VLOG(10) << "Running...done"; } void IpuBackend::Prepare() { - VLOG(1) << "Get ModelProto ...\n"; + VLOG(10) << "Get ModelProto ...\n"; auto proto = compiler_->GetModelProto(); - - // for onnx graph debug - // std::ofstream onnxfile("paddle_model_no_check.onnx", - // std::ios_base::binary); - // onnxfile.write(proto.data(), proto.size()); - // onnxfile.close(); - - VLOG(1) << "Save Model to file paddle_model.onnx ...\n"; + VLOG(10) << "Save Model to file paddle_model.onnx ...\n"; compiler_->SaveModelProto("paddle_model.onnx"); - - VLOG(1) << "Constructing DataFlow\n"; + VLOG(10) << "Constructing DataFlow\n"; std::vector anchor_ids; for (popart::TensorId item : compiler_->GetOutputs()) { anchor_ids.push_back(item); @@ -119,7 +127,7 @@ void IpuBackend::Prepare() { "IpuBackend::AttachDevice(id) first.")); if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) { - VLOG(1) << "Creating TrainingSession from Onnx Model..."; + VLOG(10) << "Creating TrainingSession from Onnx Model..."; auto popart_optimizer = GetPopartOptimizer(); auto tensors = compiler_->GetTensors(); auto it = tensors.find(optimizer_.loss_); @@ -132,62 +140,71 @@ void IpuBackend::Prepare() { popart::InputShapeInfo(), ipu_strategy_->popart_options_, popart::Patterns(popart::PatternsLevel::Default)); } else { - VLOG(1) << "Creating InferenceSession from Onnx Model..."; + VLOG(10) << "Creating InferenceSession from Onnx Model..."; session_ = popart::InferenceSession::createFromOnnxModel( proto, dataFlow, curr_device_, popart::InputShapeInfo(), ipu_strategy_->popart_options_, popart::Patterns(popart::PatternsLevel::Default)); } - VLOG(1) << "Creating session from Onnx Model...done"; + VLOG(10) << "Creating session from Onnx Model...done"; - VLOG(1) << "Preparing session device..."; + VLOG(10) << "Preparing session device..."; session_->prepareDevice(); - VLOG(1) << "Preparing session device...done"; + VLOG(10) << "Preparing session device...done"; - VLOG(1) << "Copy weights from host to device..."; + VLOG(10) << "Copy weights from host to device..."; session_->weightsFromHost(); - VLOG(1) << "Copy weights from host to device...done"; + VLOG(10) << "Copy weights from host to device...done"; } -void IpuBackend::Run(const std::vector& inputs, - const std::vector& outputs) { - if (!is_prepared_) { - Prepare(); - is_prepared_ = true; - } - - std::map popart_inputs; - std::map input_wrappers; - auto input_tensors = compiler_->GetInputs(); - for (size_t i = 0; i < inputs.size(); i++) { - auto tensor_id = input_tensors[i]; - auto tensor = const_cast(inputs[i]); - input_wrappers.emplace(tensor_id, PaddleIArray(tensor)); - popart_inputs.emplace(tensor_id, input_wrappers.at(tensor_id)); - } +std::vector IpuBackend::GetTensorShape(const std::string& var_name) { + auto oshape = compiler_->GetTensorShape(var_name); + oshape.insert(oshape.begin(), ipu_strategy_->batches_per_step); + return oshape; +} - std::map popart_anchors; - std::map anchor_wrappers; - auto output_tensors = compiler_->GetOutputs(); - for (size_t i = 0; i < outputs.size(); i++) { - auto tensor_id = output_tensors[i]; - auto tensor = const_cast(outputs[i]); - anchor_wrappers.emplace(tensor_id, PaddleIArray(tensor)); - popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id)); +std::unique_ptr IpuBackend::GetPopartOptimizer() { + // TODO(xiaobingw): change type_ to enum + PADDLE_ENFORCE_NE( + optimizer_.type_, "", + platform::errors::InvalidArgument("Optimizer type have not been set.")); + if (optimizer_.type_ == "sgd") { + auto optimizer = std::make_unique( + popart::OptimizerValue(GetLRFromScope(), false), + popart::OptimizerValue(popart::SGD::getUnsetWeightDecay()), + popart::OptimizerValue(popart::SGD::getUnsetMomentum()), + popart::OptimizerValue(popart::SGD::getUnsetDampening()), + popart::OptimizerValue(popart::SGD::getUnsetVelocityScaling()), + popart::OptimizerValue(popart::SGD::getUnsetLossScaling())); + return optimizer; + } else if (optimizer_.type_ == "adam") { + auto optimizer = std::make_unique( + popart::OptimizerValue(GetLRFromScope(), false), + popart::OptimizerValue(popart::Adam::getUnsetWeightDecay()), + popart::OptimizerValue(GetOptimizerAttr("beta1"), false), + popart::OptimizerValue(GetOptimizerAttr("beta2"), false), + popart::OptimizerValue(GetOptimizerAttr("epsilon"), false), + popart::OptimizerValue(popart::Adam::getUnsetLossScaling()), + popart::AdamMode::Adam, popart::WeightDecayMode::Decay, + popart::DataType::FLOAT, popart::DataType::FLOAT, + popart::DataType::FLOAT); + return optimizer; + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Optimizer %s is not implemented now.", optimizer_.type_)); } +} - if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) { - VLOG(1) << "Update optimizer learning rate..."; - auto popart_optimizer = GetPopartOptimizer(); - auto session = dynamic_cast(session_.get()); - session->updateOptimizerFromHost(popart_optimizer.get()); +float IpuBackend::GetOptimizerAttr(const std::string& attr, + float default_value) { + if (optimizer_.attrs_.count(attr) == 0) { + return default_value; } + return optimizer_.attrs_.at(attr); +} - popart::StepIO stepio(popart_inputs, popart_anchors); - - VLOG(1) << "Running..."; - session_->run(stepio); - VLOG(1) << "Running...done"; +void IpuBackend::SetOptimizerAttr(const std::string& attr, float value) { + optimizer_.attrs_[attr] = value; } float IpuBackend::GetLRFromScope() { @@ -201,17 +218,8 @@ float IpuBackend::GetLRFromScope() { return tensor.data()[0]; } -// ipu_num_ must be pow(2,n); -int IpuBackend::UpperIpuNum() { - PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0, - platform::errors::Unavailable( - "The ipu num get is wrong, please make sure the " - "sharding or pipline parameter is right.")); - int i = 0; - while (pow(2, i) < ipu_strategy_->num_ipus) { - i++; - } - return pow(2, i); +void IpuBackend::SetIpuStrategy(const IpuStrategy& strategy) { + ipu_strategy_ = &strategy; } size_t IpuBackend::GetNumDevices() { @@ -274,7 +282,7 @@ Device IpuBackend::GetDevice(int id) { void IpuBackend::AttachDevice(int id) { // trick here // Compiler ipu is not same as the runtime ipu. - VLOG(1) << "comile ipu id = " << id; + VLOG(10) << "comile ipu id = " << id; bool ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); if (ipu_model) { return; @@ -287,17 +295,20 @@ void IpuBackend::AttachDevice(int id) { "Can't attach IPU, ipu_num = %d.", UpperIpuNum())); } -IpuBackend::~IpuBackend() { - if (instance_ == nullptr) { - return; - } +bool IpuBackend::DeviceIsAttached() { return curr_device_ != nullptr; } - // detach device - if (curr_device_ != nullptr && curr_device_->isAttached()) { - curr_device_->detach(); +// ipu_num_ must be pow(2,n); +int IpuBackend::UpperIpuNum() { + PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0, + platform::errors::Unavailable( + "The ipu num get is wrong, please make sure the " + "sharding or pipline parameter is right.")); + int i = 0; + while (pow(2, i) < ipu_strategy_->num_ipus) { + i++; } + return pow(2, i); } -bool IpuBackend::DeviceIsAttached() { return curr_device_ != nullptr; } } // namespace ipu } // namespace framework diff --git a/paddle/fluid/framework/ipu/ipu_backend.h b/paddle/fluid/framework/ipu/ipu_backend.h index 13721d3d6b12f..7b787b568267d 100644 --- a/paddle/fluid/framework/ipu/ipu_backend.h +++ b/paddle/fluid/framework/ipu/ipu_backend.h @@ -52,74 +52,50 @@ class IpuBackend { IpuBackend(); ~IpuBackend(); + static std::shared_ptr GetInstance(); + void Compile(ir::Graph *graph, const std::vector &feed_list, const std::vector &fetch_list); - void Run(const std::vector &inputs, const std::vector &outputs); - std::string GetOptimizerType() { return optimizer_.type_; } - - void SetOptimizerType(const std::string &type) { optimizer_.type_ = type; } - - float GetOptimizerAttr(const std::string &name, float default_value = 0.0f) { - if (optimizer_.attrs_.count(name) == 0) { - return default_value; - } - return optimizer_.attrs_.at(name); - } - - void SetOptimizerAttr(const std::string &attr, float value) { - optimizer_.attrs_[attr] = value; - } - - void SetLoss(const std::string &loss) { optimizer_.loss_ = loss; } - - std::unique_ptr GetPopartOptimizer(); - std::vector GetTensorShape(const std::string &var_name); - - // SetScope, so we can get model parameters from scope void SetScope(const Scope &scope) { scope_ = &scope; } + // Optimizer + std::unique_ptr GetPopartOptimizer(); + std::string GetOptimizerType() { return optimizer_.type_; } + void SetOptimizerType(const std::string &type) { optimizer_.type_ = type; } + float GetOptimizerAttr(const std::string &attr, float default_value = 0.0f); + void SetOptimizerAttr(const std::string &attr, float value); + void SetLoss(const std::string &loss) { optimizer_.loss_ = loss; } void SetLRVarName(const std::string &name) { optimizer_.lr_var_name_ = name; } - // get fixed and adjustable learning rate from scope - float GetLRFromScope(); - - void SetIpuStrategy(const IpuStrategy &strategy) { - ipu_strategy_ = &strategy; - } - int UpperIpuNum(); + // IpuStrategy + void SetIpuStrategy(const IpuStrategy &strategy); size_t GetNumDevices(); std::vector GetDeviceIds(); Device GetDevice(int id); void AttachDevice(int id); bool DeviceIsAttached(); - static std::shared_ptr GetInstance() { - if (NULL == instance_) { - instance_.reset(new IpuBackend()); - } - return instance_; - } - private: void Prepare(); - void LowerWeights(const ir::Graph *); - void LowerBody(const ir::Graph *); - std::vector GetOpInputs(const OpDesc *op); + float GetLRFromScope(); + int UpperIpuNum(); private: + static std::shared_ptr instance_; + std::shared_ptr compiler_; + Optimizer optimizer_; + std::unique_ptr session_; + std::shared_ptr curr_device_; bool is_prepared_ = false; + + // not own const Scope *scope_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr; - - std::unique_ptr session_; - std::shared_ptr curr_device_; - static std::shared_ptr instance_; - std::shared_ptr compiler_; }; } // namespace ipu diff --git a/paddle/fluid/framework/ipu/popart_canonicalization/nn_ops.cc b/paddle/fluid/framework/ipu/popart_canonicalization/nn_ops.cc index 0559841789073..5af57621d8fcb 100644 --- a/paddle/fluid/framework/ipu/popart_canonicalization/nn_ops.cc +++ b/paddle/fluid/framework/ipu/popart_canonicalization/nn_ops.cc @@ -53,6 +53,7 @@ Node *conv2d_handler(Graph *graph, Node *node) { } Node *batch_norm_handler(Graph *graph, Node *node) { + // TODO(alleng) differ from trainning & inference auto *op = node->Op(); std::vector inputs; inputs.push_back(GetInputNode("X", node)); @@ -66,7 +67,7 @@ Node *batch_norm_handler(Graph *graph, Node *node) { outputs.push_back(GetOutputNode("VarianceOut", node)); outputs.push_back(GetOutputNode("SavedMean", node)); outputs.push_back(GetOutputNode("SavedVariance", node)); - outputs.push_back(GetOutputNode("ReserveSpace", node)); + // outputs.push_back(GetOutputNode("ReserveSpace", node)); auto momentum = BOOST_GET_CONST(float, op->GetAttr("momentum")); auto epsilon = BOOST_GET_CONST(float, op->GetAttr("epsilon")); // data_layout diff --git a/paddle/fluid/framework/ipu/supported_ops_autogen.h b/paddle/fluid/framework/ipu/supported_ops_autogen.h index 41411df5e56bc..dc3fb94add049 100644 --- a/paddle/fluid/framework/ipu/supported_ops_autogen.h +++ b/paddle/fluid/framework/ipu/supported_ops_autogen.h @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -// TODO(alleng) add `//clang-format off` in python +// TODO(alleng) add `//clang-format off` in python, add `//NOLINT` for each op // clang-format off // Ops from AiGraphcoreOpset1 OP_DECL(popart_gelu,aiOnnxOpset1.gelu, NONE) -// OP_DECL(popart_groupnormalization, aiOnnxOpset1.groupnormalization, ARG(INT,num_groups) ARG(FLOAT,epsilon)) +OP_DECL(popart_groupnormalization, aiOnnxOpset1.groupnormalization, ARG(INT,num_groups) ARG(FLOAT,epsilon)) // OP_DECL(popart_reshape,aiOnnxOpset.reshape, NONE) // Ops from AiOnnxOpset10