Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Inference Tensorrt] Add attr for trt engine and handle the input seq problem for ernie var len. #33575

Merged
merged 5 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
auto word_id_name = op_desc.Input("WordId").front();
auto pos_id_name = op_desc.Input("PosId").front();
engine_->Set("ernie_pos_name", new std::string(pos_id_name));

auto sent_id_name = op_desc.Input("SentId").front();
auto word_emb_name = op_desc.Input("WordEmbedding").front();
auto pos_emb_name = op_desc.Input("PosEmbedding").front();
Expand Down
13 changes: 10 additions & 3 deletions paddle/fluid/inference/tensorrt/convert/slice_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,16 @@ class SliceOpConverter : public OpConverter {
std::vector<nvinfer1::ITensor*> plugin_inputs;
// plugin_inputs.emplace_back(trans_layer->getOutput(0));
plugin_inputs.emplace_back(input);
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(2)->getName())); // cu_seqlens,
// eval_placeholder_2

std::string pos_name;
if (engine_->Has("ernie_pos_name")) {
pos_name = engine_->Get<std::string>("ernie_pos_name");
} else {
// hard code for compatibility
pos_name = engine_->network()->getInput(2)->getName();
}
plugin_inputs.emplace_back(
engine_->GetITensor(pos_name)); // cu_seqlens, eval_placeholder_2

// bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SpecialSlicePluginDynamic* plugin =
Expand Down
89 changes: 88 additions & 1 deletion paddle/fluid/inference/tensorrt/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,15 @@ class TensorRTEngine {
dy::initLibNvInferPlugins(&logger, "");
}

~TensorRTEngine() {}
~TensorRTEngine() {
for (auto& attr : attrs_) {
if (attr_dels_.find(attr.first) != attr_dels_.end()) {
attr_dels_[attr.first]();
}
}
attrs_.clear();
attr_dels_.clear();
}

// Add an input and set its name, data type and dimension.
nvinfer1::ITensor* DeclareInput(const std::string& name,
Expand Down Expand Up @@ -386,6 +394,82 @@ class TensorRTEngine {
}
#endif

bool Has(const std::string& attr_name) const {
return attrs_.count(attr_name) > 0;
}

void Erase(const std::string& attr_name) {
if (!Has(attr_name)) {
return;
}
if (attr_dels_.find(attr_name) != attr_dels_.end()) {
attr_dels_[attr_name]();
attr_dels_.erase(attr_name);
}
attrs_.erase(attr_name);
}

// Set a pointer to the attribute. Engine takes ownership of the attribute.
template <typename AttrType>
void Set(const std::string& attr_name, AttrType* attr) {
if (attrs_.count(attr_name) == 0) {
PADDLE_ENFORCE_EQ(
attrs_.count(attr_name), 0,
platform::errors::AlreadyExists(
"Attribute %s already set in trt engine.", attr_name));
} else {
VLOG(3) << "Setting the attribute " << attr_name << " for trt engine "
<< this;
}
attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(3) << "deleting " << attr_name;
delete attr;
};
}

// Set a pointer to the attribute. Engine doesn't take ownership. Caller
// should delete the attribute.
template <typename AttrType>
void SetNotOwned(const std::string& attr_name, AttrType* attr) {
PADDLE_ENFORCE_EQ(
attrs_.count(attr_name), 0,
platform::errors::AlreadyExists(
"Attribute %s already set in trt engine.", attr_name));
attrs_[attr_name] = attr;
}

// Get a reference to the attributed previously set.
template <typename AttrType>
AttrType& Get(const std::string& attr_name) const {
PADDLE_ENFORCE_NE(attrs_.find(attr_name), attrs_.end(),
platform::errors::InvalidArgument(
"Attribute %s not found in trt engine.", attr_name));
try {
return *boost::any_cast<AttrType*>(attrs_.at(attr_name));
} catch (boost::bad_any_cast&) {
auto TypeToString = [](const std::type_info& info) -> std::string {
if (std::type_index(info) == std::type_index(typeid(bool*))) {
return "bool";
} else if (std::type_index(info) == std::type_index(typeid(int*))) {
return "int";
} else if (std::type_index(info) ==
std::type_index(typeid(const int*))) {
return "const int";
} else if (std::type_index(info) ==
std::type_index(typeid(std::string*))) {
return "std::string";
}
return info.name();
};

PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid type for attritube %s, expected: %s, actual: %s.", attr_name,
TypeToString(typeid(AttrType*)),
TypeToString(attrs_.at(attr_name).type())));
}
}

private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// ensure that the thread is associated with the correct device by calling
Expand Down Expand Up @@ -441,6 +525,9 @@ class TensorRTEngine {
infer_ptr<nvinfer1::IHostMemory> ihost_memory_;
std::unordered_map<nvinfer1::ITensor*, float> quant_dynamic_range_;

std::unordered_map<std::string, boost::any> attrs_;
std::unordered_map<std::string, std::function<void(void)>> attr_dels_;

// For dynamic shape
bool with_dynamic_shape_{false};
infer_ptr<nvinfer1::INetworkDefinition> infer_networkv2_;
Expand Down