Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trt reduce_mean supported #34204

Merged
merged 15 commits into from
Jul 21, 2021
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,7 @@ USE_TRT_CONVERTER(nearest_interp);
USE_TRT_CONVERTER(reshape);
USE_TRT_CONVERTER(reduce_sum);
USE_TRT_CONVERTER(gather_nd);
USE_TRT_CONVERTER(reduce_mean);
#endif

namespace paddle_infer {
Expand Down
33 changes: 25 additions & 8 deletions paddle/fluid/inference/tensorrt/convert/reduce_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@ namespace paddle {
namespace inference {
namespace tensorrt {

class ReduceSumOpConverter : public OpConverter {
class ReduceOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a paddle reduce_sum op to tensorrt reduce layer";
VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer";
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ReduceOperation reduce_type;
if (op_type == "reduce_sum") {
reduce_type = nvinfer1::ReduceOperation::kSUM;
} else if (op_type == "reduce_mean") {
reduce_type = nvinfer1::ReduceOperation::kAVG;
}

auto* x = engine_->GetITensor(op_desc.Input("X").front());
nvinfer1::Dims input_shape = x->getDimensions();
Expand All @@ -51,15 +57,13 @@ class ReduceSumOpConverter : public OpConverter {
BOOST_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("dim"));
bool reduce_all = BOOST_GET_CONST(bool, op_desc.GetAttr("reduce_all"));

// Now we only support dynamic_shape mode.
nvinfer1::IReduceLayer* layer = nullptr;
if (reduce_all) {
uint32_t reduce_dim = 0;
for (int i = 0; i < input_dims; ++i) {
reduce_dim |= 1 << i;
}
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x,
nvinfer1::ReduceOperation::kSUM, reduce_dim,
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type, reduce_dim,
keep_dim);
} else {
auto CvtToBitMask = [&](const std::vector<int32_t>& dims) -> uint32_t {
Expand All @@ -73,18 +77,31 @@ class ReduceSumOpConverter : public OpConverter {
}
return res;
};
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x,
nvinfer1::ReduceOperation::kSUM,
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type,
CvtToBitMask(dim), keep_dim);
}

auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "reduce_sum", {output_name}, test_mode);
RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode);
}

protected:
std::string op_type;
};

class ReduceSumOpConverter : public ReduceOpConverter {
public:
ReduceSumOpConverter() { op_type = "reduce_sum"; }
};

class ReduceMeanOpConverter : public ReduceOpConverter {
public:
ReduceMeanOpConverter() { op_type = "reduce_mean"; }
};

} // namespace tensorrt
} // namespace inference
} // namespace paddle

REGISTER_TRT_OP_CONVERTER(reduce_sum, ReduceSumOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter);
11 changes: 4 additions & 7 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"nearest_interp",
"anchor_generator",
"reduce_sum",
"reduce_mean",
};
};

Expand Down Expand Up @@ -709,15 +710,11 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (!with_dynamic_shape && shape[0] == -1) return false;
}

if (op_type == "reduce_sum") {
if (!with_dynamic_shape) {
VLOG(3) << "the reduce_sum does not support static shape yet";
return false;
}

if (op_type == "reduce_sum" || op_type == "reduce_mean") {
if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") &&
desc.HasAttr("reduce_all"))) {
VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or "
VLOG(3) << "the " << op_type
<< " does not have attr (keep_dim or dim or "
"reduce_all)";
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120)
#set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200)
set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120)
set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45)
set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60)
endif()
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig


class TRTReduceMeanTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 3, 224, 224], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(
data, dim=[2, -1], keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)

self.feeds = {
"data": np.random.random([3, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
self.dynamic_shape_params = TRTReduceMeanTest.DynamicShapeParam({
'data': [1, 3, 224, 224]
}, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False)

def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))


class TRTReduceMeanAllTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 3, 224, 224], dtype="float32")
reduce_mean = fluid.layers.reduce_mean(data, keep_dim=True)
out = fluid.layers.batch_norm(reduce_mean, is_test=True)

self.feeds = {
"data": np.random.random([3, 3, 224, 224]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTReduceMeanAllTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
self.dynamic_shape_params = TRTReduceMeanAllTest.DynamicShapeParam({
'data': [1, 3, 224, 224]
}, {'data': [3, 3, 224, 224]}, {'data': [3, 3, 224, 224]}, False)

def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))


if __name__ == "__main__":
unittest.main()