Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cherry pick #42255 (fuse conv + bn in QAT) and #42378 (support skip_op_list in PTQ) #43301

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,27 @@ def forward(self, input):
return input


def fuse_conv_bn(model):
is_train = False
if model.training:
model.eval()
is_train = True
fuse_list = []
tmp_pair = [None, None]
for name, layer in model.named_sublayers():
if isinstance(layer, nn.Conv2D):
tmp_pair[0] = name
if isinstance(layer, nn.BatchNorm2D):
tmp_pair[1] = name

if tmp_pair[0] and tmp_pair[1] and len(tmp_pair) == 2:
fuse_list.append(tmp_pair)
tmp_pair = [None, None]
model = fuse_layers(model, fuse_list)
if is_train:
model.train()


def fuse_layers(model, layers_to_fuse, inplace=False):
'''
fuse layers in layers_to_fuse
Expand Down
10 changes: 10 additions & 0 deletions python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import warnings

import paddle
import paddle.nn as nn
import paddle.nn.quant.quant_layers as quant_layers
from paddle.fluid import dygraph, core, framework, unique_name
from paddle.fluid.framework import IrGraph
Expand All @@ -32,6 +33,7 @@
from paddle.fluid.log_helper import get_logger
from .. import quantization_pass
from . import utils
from . import fuse_utils

__all__ = ['ImperativeQuantAware']

Expand All @@ -52,6 +54,7 @@ def __init__(
weight_bits=8,
activation_bits=8,
moving_rate=0.9,
fuse_conv_bn=False,
weight_preprocess_layer=None,
act_preprocess_layer=None,
weight_quantize_layer=None,
Expand All @@ -76,6 +79,7 @@ def __init__(
activation_bits(int): quantization bit number for activations.
moving_rate(float): the parameter for 'moving_average_abs_max'
quantization.
fuse_conv_bn(bool): Whether to fuse conv and bn, default is False.
weight_preprocess_layer(paddle.nn.Layer, optional): A paddle
Layer that defines how to preprocess weight before quantization.
Using this can quickly test if user's preprocess method works
Expand Down Expand Up @@ -188,6 +192,7 @@ def forward(self, inputs):
model_path="./imperative_model_qat")
"""
super(ImperativeQuantAware, self).__init__()
self.fuse_conv_bn = fuse_conv_bn

kwargs = {
"quantizable_layer_type": quantizable_layer_type,
Expand Down Expand Up @@ -256,8 +261,13 @@ def forward(self, inputs):
"""
assert isinstance(model, dygraph.Layer), \
"The model must be the instance of dygraph.Layer."

if self.fuse_conv_bn:
fuse_utils.fuse_conv_bn(model)

self._quantize_inputs.apply(model)
self._quantize_outputs.apply(model)
return model

def save_quantized_model(self, layer, path, input_spec=None, **config):
self._quantize_outputs.save_quantized_model(layer, path, input_spec,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def __init__(self,
onnx_format=False,
optimize_model=False,
is_use_cache_file=False,
skip_tensor_list=None,
cache_dir=None):
'''
Constructor.
Expand Down Expand Up @@ -198,6 +199,7 @@ def __init__(self,
the model accuracy is usually higher when it is 'channel_wise_abs_max'.
onnx_format(bool): Whether to export the quantized model with format of ONNX.
Default is False.
skip_tensor_list(list): List of skip quant tensor name.
optimize_model(bool, optional): If set optimize_model as True, it applies
some passes to the model before quantization, and it supports
`conv2d/depthwise_conv2d + bn` pass so far. Some targets require the
Expand Down Expand Up @@ -301,6 +303,7 @@ def __init__(self,
self._activation_quantize_type = activation_quantize_type
self._weight_quantize_type = weight_quantize_type
self._onnx_format = onnx_format
self._skip_tensor_list = skip_tensor_list
self._is_full_quantize = is_full_quantize
if is_full_quantize:
self._quantizable_op_type = self._support_quantize_op_type
Expand Down Expand Up @@ -547,6 +550,12 @@ def collect_var_name(var_name_list, persistable_var_names, op_type):
persistable_var_names = _all_persistable_var_names(self._program)
for block_id in range(len(self._program.blocks)):
for op in self._program.blocks[block_id].ops:
# skip quant form self._skip_tensor_list
if self._skip_tensor_list is not None:
for inp_name in utils._get_op_input_var_names(op):
if inp_name in self._skip_tensor_list:
op._set_attr("op_namescope", "skip_quant")

op_type = op.type
if self._is_full_quantize and \
op_type not in self._quantizable_op_type:
Expand Down
1 change: 1 addition & 0 deletions python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 200)
set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat_fuse PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat_user_defined PROPERTIES TIMEOUT 200)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,15 @@ def set_vars(self):
self.onnx_format = False
self.check_export_model_accuracy = True
self.diff_threshold = 0.01
self.fuse_conv_bn = False

def func_qat(self):
self.set_vars()

imperative_qat = ImperativeQuantAware(
weight_quantize_type=self.weight_quantize_type,
activation_quantize_type=self.activation_quantize_type)
activation_quantize_type=self.activation_quantize_type,
fuse_conv_bn=self.fuse_conv_bn)

with fluid.dygraph.guard():
# For CI coverage
Expand Down Expand Up @@ -214,6 +216,7 @@ def set_vars(self):
self.activation_quantize_type = 'moving_average_abs_max'
self.onnx_format = True
self.diff_threshold = 0.025
self.fuse_conv_bn = False


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def set_vars(self):
self.activation_quantize_type = 'moving_average_abs_max'
self.diff_threshold = 0.01
self.onnx_format = False
self.fuse_conv_bn = False
print('weight_quantize_type', self.weight_quantize_type)


Expand All @@ -52,6 +53,7 @@ def set_vars(self):
self.activation_quantize_type = 'moving_average_abs_max'
self.onnx_format = True
self.diff_threshold = 0.025
self.fuse_conv_bn = False
print('weight_quantize_type', self.weight_quantize_type)


Expand Down
50 changes: 50 additions & 0 deletions python/paddle/fluid/contrib/slim/tests/test_imperative_qat_fuse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

from __future__ import print_function

import os
import numpy as np
import random
import unittest
import logging

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.log_helper import get_logger

from test_imperative_qat import TestImperativeQat

paddle.enable_static()

os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})

_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


class TestImperativeQatfuseBN(TestImperativeQat):
def set_vars(self):
self.weight_quantize_type = 'abs_max'
self.activation_quantize_type = 'moving_average_abs_max'
self.diff_threshold = 0.01
self.onnx_format = False
self.fuse_conv_bn = True


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def generate_quantized_model(self,
is_optimize_model=False,
batch_size=10,
batch_nums=10,
onnx_format=False):
onnx_format=False,
skip_tensor_list=None):

place = fluid.CPUPlace()
exe = fluid.Executor(place)
Expand All @@ -136,6 +137,7 @@ def generate_quantized_model(self,
is_full_quantize=is_full_quantize,
optimize_model=is_optimize_model,
onnx_format=onnx_format,
skip_tensor_list=skip_tensor_list,
is_use_cache_file=is_use_cache_file)
ptq.quantize()
ptq.save_quantized_model(self.int8_model_path)
Expand All @@ -154,7 +156,8 @@ def run_test(self,
batch_size=10,
infer_iterations=10,
quant_iterations=5,
onnx_format=False):
onnx_format=False,
skip_tensor_list=None):

origin_model_path = self.download_model(data_url, data_md5, model_name)
origin_model_path = os.path.join(origin_model_path, model_name)
Expand All @@ -166,10 +169,10 @@ def run_test(self,

print("Start INT8 post training quantization for {0} on {1} images ...".
format(model_name, quant_iterations * batch_size))
self.generate_quantized_model(origin_model_path, algo, round_type,
quantizable_op_type, is_full_quantize,
is_use_cache_file, is_optimize_model,
batch_size, quant_iterations, onnx_format)
self.generate_quantized_model(
origin_model_path, algo, round_type, quantizable_op_type,
is_full_quantize, is_use_cache_file, is_optimize_model, batch_size,
quant_iterations, onnx_format, skip_tensor_list)

print("Start INT8 inference for {0} on {1} images ...".format(
model_name, infer_iterations * batch_size))
Expand Down Expand Up @@ -426,5 +429,38 @@ def test_post_training_mse_onnx_format_full_quant(self):
onnx_format=onnx_format)


class TestPostTrainingavgForMnistSkipOP(TestPostTrainingQuantization):
def test_post_training_avg_skip_op(self):
model_name = "mnist_model"
data_url = "http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5 = "be71d3997ec35ac2a65ae8a145e2887c"
algo = "avg"
round_type = "round"
quantizable_op_type = ["conv2d", "depthwise_conv2d", "mul"]
is_full_quantize = False
is_use_cache_file = False
is_optimize_model = True
diff_threshold = 0.01
batch_size = 10
infer_iterations = 50
quant_iterations = 5
skip_tensor_list = ["fc_0.w_0"]
self.run_test(
model_name,
data_url,
data_md5,
algo,
round_type,
quantizable_op_type,
is_full_quantize,
is_use_cache_file,
is_optimize_model,
diff_threshold,
batch_size,
infer_iterations,
quant_iterations,
skip_tensor_list=skip_tensor_list)


if __name__ == '__main__':
unittest.main()