Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dygraph qat] Refine saving output scale to infer program #31784

Merged
merged 9 commits into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 123 additions & 106 deletions python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ def __init__(self,
super(ImperativeQuantizeInputs, self).__init__()

self._quantizable_layer_type = tuple(
utils.supported_quant_layers_map[layer]
if layer in utils.supported_quant_layers_map else layer
utils.quant_input_layers_map[layer]
if layer in utils.quant_input_layers_map else layer
for layer in quantizable_layer_type)
for layer in self._quantizable_layer_type:
assert not isinstance(layer, str), \
Expand Down Expand Up @@ -324,12 +324,11 @@ def apply(self, model):
target = name[last_idx:idx]

quant_layer = self._get_quantized_layer(layer)
setattr(quant_layer, "layer_name", layer.full_name())
setattr(obj, target, quant_layer)

def _get_quantized_layer(self, layer):
quant_layer_name = None
for key, value in utils.supported_quant_layers_map.items():
for key, value in utils.quant_input_layers_map.items():
if isinstance(layer, value):
quant_layer_name = 'Quantized' + key
break
Expand Down Expand Up @@ -372,6 +371,9 @@ def apply(self, model):
"""
assert isinstance(model, dygraph.Layer), \
"The model must be the instance of dygraph.Layer."

# Calculate the target ops's output scale, and don't consider
# the skip_quant attr
for _, layer in model.named_sublayers():
if self._is_target_layer(layer):
self._init_scale_params(layer)
Expand Down Expand Up @@ -411,24 +413,21 @@ def save_quantized_model(self, layer, path, input_spec=None, **config):
assert isinstance(layer, dygraph.Layer), \
"The model must be the instance of dygraph.Layer."

# remove handles and collect output scales
self._gather_output_scale(layer)

with dygraph.guard():
layer.eval()
for handle in self._register_hook_handle_list:
handle.remove()
for _, sub_layer in layer.named_sublayers():
if self._is_target_layer(sub_layer):
if hasattr(sub_layer, "layer_name"):
layer_name = sub_layer.layer_name
else:
layer_name = sub_layer.full_name()
if hasattr(sub_layer, "_quant_out_scale"):
self._out_scale_dict[layer_name] = float(
sub_layer._quant_out_scale)

# save the quantized model that doesn't have output scales
paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config)

if len(self._out_scale_dict) == 0:
warnings.warn("Warning: No Layer of the model while to be " \
"saved contains the out_threshold attribute, so the " \
"generated inference model would not contain the " \
"out_threshold.")
return

# load static model
is_dynamic_mode = False
if paddle.in_dynamic_mode():
Expand All @@ -443,96 +442,103 @@ def save_quantized_model(self, layer, path, input_spec=None, **config):
basename = os.path.basename(path)
model_filename = basename + INFER_MODEL_SUFFIX
params_filename = basename + INFER_PARAMS_SUFFIX
[inference_program, feed_target_names, fetch_targets] = (

[infer_program, feed_target_names, fetch_targets] = (
load_inference_model(
dirname=dirname,
executor=exe,
model_filename=model_filename,
params_filename=params_filename))

# TODO(jc): analyse whether the dygraph model has
# several blocks before applying qat
assert infer_program.num_blocks == 1, \
"Quantization aware training (QAT) requires the program " \
"only has a block for now. When the model has if-else or " \
"while, the program will have several blocks."

# set output scales to the static model
check_behind_op = False
op_count = 0
ops_list = [key for key, _ in self._out_scale_dict.items()]
if len(ops_list) == 0:
warnings.warn(
"Warning: No Layer of the model while to be saved contains "
"the out_threshold attribute, so the generated inference "
"model would not contain the out_threshold.")
else:
# Because the Layer in dygraph may correspond to multiple ops
# in static program after being saved. To ensure correctness,
# the outscale collected for output of dygraph Layer can only
# be set to the last op in the corresponding ops in static program.
#
# We can judge the execution order of the ops which corresponding
# to dygraph Layer by check_behind_op
forward_op = None
for block in inference_program.blocks:
for op in block.ops:
if op.type in utils.op_real_in_out_name:
if op_count > len(ops_list):
warnings.warn(
"The number of Layer which has "
"out_threshold attribute should be bigger than "
"the op in inference model")
break
if check_behind_op:
check_behind_op = False
if op.type == "elementwise_add":
if self._is_op_matched(ops_list[op_count], op,
block):
op._set_attr("out_threshold",
self._out_scale_dict[ops_list[
op_count]])
op_count += 1
forward_op = None
continue
else:
if forward_op is None:
raise ValueError(
"forward_op should not be None")
if self._is_op_matched(ops_list[op_count],
forward_op, block):
forward_op._set_attr(
"out_threshold", self._out_scale_dict[
ops_list[op_count]])
op_count += 1
forward_op = None

if op.type in ["conv2d", "depthwise_conv2d", "matmul"]:
check_behind_op = True
forward_op = op
continue
if op_count >= len(ops_list):
warnings.warn(
"The number of Layer which has out_threshold attribute should be bigger than the op in inference model"
)
break
if self._is_op_matched(ops_list[op_count], op, block):
op._set_attr(
"out_threshold",
self._out_scale_dict[ops_list[op_count]])
op_count += 1

self._set_skip_quant_attr(inference_program)
self._save_output_scale(infer_program)

# process skip quant
self._set_skip_quant_attr(infer_program)

# save the final quantized model that has output scales
save_inference_model(
dirname=dirname,
feeded_var_names=feed_target_names,
target_vars=fetch_targets,
executor=exe,
main_program=inference_program.clone(),
main_program=infer_program.clone(),
model_filename=model_filename,
params_filename=params_filename)

if is_dynamic_mode:
paddle.disable_static()

def _gather_output_scale(self, layer):
"""
Gather all output scales to self._out_scale_dict
"""
with dygraph.guard():
layer.eval()
for _, sub_layer in layer.named_sublayers():
if self._is_target_layer(sub_layer):
layer_name = sub_layer.full_name()
if hasattr(sub_layer, "_quant_out_scale"):
self._out_scale_dict[layer_name] = float(
sub_layer._quant_out_scale)

def _save_output_scale(self, infer_program):
"""
Save all output scales to the corresponding ops in static
inference program.

Because the Layer in dygraph may correspond to multiple ops
in static program after being saved. To ensure correctness,
the outscale collected for output of dygraph Layer can only
be set to the last op in the corresponding ops in static program.
"""
assert infer_program.num_blocks == 1, \
"The inference program should only have a block."

global_block = infer_program.global_block()
target_ops = global_block.ops

scale_idx = 0
op_idx = 0
attr_name = "out_threshold"

for scale_name, scale_value in self._out_scale_dict.items():
while True:
if op_idx >= len(target_ops):
break

op = target_ops[op_idx]

if not self._is_scale_op_matched(scale_name, op, global_block):
op_idx += 1
else:
if op.type in utils.weight_op_types \
and op_idx + 1 < len(target_ops) \
and target_ops[op_idx+1].type == "elementwise_add":
target_ops[op_idx + 1]._set_attr(attr_name, scale_value)
op_idx += 2
else:
op._set_attr(attr_name, scale_value)
op_idx += 1
scale_idx += 1
break

if scale_idx != len(self._out_scale_dict):
_logger.warning("Warning: the model have %s output scales, "\
"but it only saves %s output scales." \
% (len(self._out_scale_dict), scale_idx))

def _is_target_layer(self, layer):
return isinstance(layer, utils.out_scale_layers_list) \
or 'quantized_' in layer.full_name()
return isinstance(layer, tuple(utils.quant_output_layers_map.values())) \
or ('quantized_' in layer.full_name() and \
'quantized_noweight' not in layer.full_name())

def _init_scale_params(self, layer, name=None):
"""
Expand Down Expand Up @@ -570,27 +576,38 @@ def _create_param(in_layer, first_name, last_name, dtype):
layer._quant_out_accum = _create_param(layer, name, "accum", dtype)
layer._quant_out_accum.stop_gradient = True

# Judge whether the op in program matches the Layer in dynamic model
def _is_op_matched(self, layer_name, op, block):
output_var_names = quantization_pass._get_op_output_var_names(op)
for output_var_name in output_var_names:
output_var_tensor = block.var(output_var_name)
if output_var_tensor.dtype not in [
core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32
]:
return False

# Because the naming styles of static and dynamic graph are different,
# in order to avoid mistakes, we unify the name here.
op_type = output_var_names[0].split(".")[0]
op_type = op_type.rsplit("_", 1)[0]
if op_type == 'depthwise_conv2d':
op_type = 'conv2d'
if 'prelu' in op_type:
op_type = op_type.replace('prelu', 'p_re_lu')
if 'relu' in op_type:
op_type = op_type.replace('relu', 're_lu')
return op_type in layer_name
def _is_scale_op_matched(self, scale_name, op, block):
"""
Based on the op name and attrs to judge whether the op in
program matches the scale_name. We must know the corresponding
name between dgraph and static model.
"""
fp_type = [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]
if op.type in quantization_pass._op_real_in_out_name.keys():
output_var_names = quantization_pass._get_op_output_var_names(op)
for output_var_name in output_var_names:
output_var_tensor = block.var(output_var_name)
if output_var_tensor.dtype not in fp_type:
return False

# Note that, the items have priority in corresponding_dict
corresponding_dict = {
'conv2d_tranpose': [['conv2d_transpose', \
'depthwise_conv2d_transpose'], None],
'conv2d': [['conv2d', 'depthwise_conv2d'], None],
'linear': [['matmul'], None],
're_lu6': [['relu6'], None],
'p_re_lu': [['prelu'], None],
'leaky_re_lu': [['leaky_relu'], None],
're_lu': [['relu'], None],
}

for key, value in corresponding_dict.items():
if key in scale_name:
return (op.type in value[0]) and \
(len(value) == 1 or value[1] is None or value[1](op))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为什么要添加判断value[1] is None or value[1](op)呢?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果有必要,可以新增lambda函数来判断静态图op是否满足特定条件,目前这里仅仅使用了op的名字,所以默认都是None


return op.type in scale_name

def _set_skip_quant_attr(self, program):
block = program.global_block()
Expand Down
34 changes: 28 additions & 6 deletions python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"swish": [["X"], ["Out"]],
}

supported_quant_layers_map = {
quant_input_layers_map = {
'Conv2D': paddle.nn.Conv2D,
'Linear': paddle.nn.Linear,
'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D,
Expand Down Expand Up @@ -58,8 +58,30 @@
"fake_quantize_dequantize_moving_average_abs_max"
]

out_scale_layers_list = (
paddle.nn.Conv2D, paddle.nn.Linear, paddle.nn.MaxPool2D,
paddle.nn.BatchNorm, paddle.nn.BatchNorm2D, paddle.nn.SyncBatchNorm,
paddle.nn.LeakyReLU, paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6,
paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Tanh, paddle.nn.Swish)
quant_output_layers_map = {
'Conv2D': paddle.nn.Conv2D,
'Conv2DTranspose': paddle.nn.Conv2DTranspose,
'Linear': paddle.nn.Linear,
'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D,
'AdaptiveMaxPool2D': paddle.nn.AdaptiveMaxPool2D,
'AvgPool2D': paddle.nn.AvgPool2D,
'MaxPool2D': paddle.nn.MaxPool2D,
'BatchNorm': paddle.nn.BatchNorm,
'BatchNorm2D': paddle.nn.BatchNorm2D,
'SyncBatchNorm': paddle.nn.SyncBatchNorm,
'ELU': paddle.nn.ELU,
'GELU': paddle.nn.GELU,
'LeakyReLU': paddle.nn.LeakyReLU,
'PReLU': paddle.nn.PReLU,
'ReLU': paddle.nn.ReLU,
'ReLU6': paddle.nn.ReLU6,
'Sigmoid': paddle.nn.Sigmoid,
'Softmax': paddle.nn.Softmax,
'Tanh': paddle.nn.Tanh,
'Swish': paddle.nn.Swish,
}

weight_op_types = [
"conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
"depthwise_conv2d_transpose"
]
Loading