Skip to content

Commit

Permalink
remove return value in dist.merge() (PaddlePaddle#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
baiyfbupt authored and wanghaoshuang committed Jan 10, 2020
1 parent 74fb067 commit 00f971c
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 43 deletions.
22 changes: 9 additions & 13 deletions demo/distillation/distillation_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def compress(args):
# print(v.name, v.shape)

exe.run(t_startup)
_download('http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar', '.')
_download(
'http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar',
'.')
_decompress('./ResNet50_pretrained.tar')
assert args.teacher_pretrained_model and os.path.exists(
args.teacher_pretrained_model
Expand All @@ -168,31 +170,25 @@ def if_exist(var):
predicate=if_exist)

data_name_map = {'image': 'image'}
main = merge(
teacher_program,
student_program,
data_name_map,
place)

with fluid.program_guard(main, s_startup):
l2_loss = l2_loss("teacher_fc_0.tmp_0", "fc_0.tmp_0", main)
merge(teacher_program, student_program, data_name_map, place)

with fluid.program_guard(student_program, s_startup):
l2_loss = l2_loss("teacher_fc_0.tmp_0", "fc_0.tmp_0", student_program)
loss = avg_cost + l2_loss
opt = create_optimizer(args)
opt.minimize(loss)
exe.run(s_startup)
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
parallel_main = fluid.CompiledProgram(main).with_data_parallel(
parallel_main = fluid.CompiledProgram(student_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)

for epoch_id in range(args.num_epochs):
for step_id, data in enumerate(train_loader):
loss_1, loss_2, loss_3 = exe.run(
parallel_main,
feed=data,
fetch_list=[
loss.name, avg_cost.name, l2_loss.name
])
fetch_list=[loss.name, avg_cost.name, l2_loss.name])
if step_id % args.log_period == 0:
_logger.info(
"train_epoch {} step {} loss {:.6f}, class loss {:.6f}, l2 loss {:.6f}".
Expand Down
46 changes: 23 additions & 23 deletions docs/docs/api/single_distiller_api.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## merge
paddleslim.dist.merge(teacher_program, student_program, data_name_map, place, scope=fluid.global_scope(), name_prefix='teacher_') [[源代码]](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/dist/single_distiller.py#L19)
paddleslim.dist.merge(teacher_program, student_program, data_name_map, place, scope=fluid.global_scope(), name_prefix='teacher_') [[源代码]](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/dist/single_distiller.py#L19)

: merge将两个paddle program(teacher_program, student_program)融合为一个program,并将融合得到的program返回。在融合的program中,可以为其中合适的teacher特征图和student特征图添加蒸馏损失函数,从而达到用teacher模型的暗知识(Dark Knowledge)指导student模型学习的目的。
: merge将teacher_program融合到student_program中。在融合的program中,可以为其中合适的teacher特征图和student特征图添加蒸馏损失函数,从而达到用teacher模型的暗知识(Dark Knowledge)指导student模型学习的目的。

**参数:**

Expand All @@ -12,7 +12,7 @@ paddleslim.dist.merge(teacher_program, student_program, data_name_map, place, sc
- **scope**(Scope)-该参数表示程序使用的变量作用域,如果不指定将使用默认的全局作用域。默认值:[*fluid.global_scope()*](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/global_scope_cn.html#global-scope)
- **name_prefix**(str)-merge操作将统一为teacher的[*Variables*](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.3/api_guides/low_level/program.html#variable)添加的名称前缀name_prefix。默认值:'teacher_'

**返回:** 由student_program和teacher_program merge得到的program
**返回:**

!!! note "Note"
*data_name_map***teacher_var name到student_var name的映射**,如果写反可能无法正确进行merge
Expand All @@ -37,8 +37,8 @@ with fluid.program_guard(teacher_program):
data_name_map = {'y':'x'}
USE_GPU = False
place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
main_program = dist.merge(teacher_program, student_program,
data_name_map, place)
dist.merge(teacher_program, student_program,
data_name_map, place)
```


Expand Down Expand Up @@ -76,10 +76,10 @@ with fluid.program_guard(teacher_program):
data_name_map = {'y':'x'}
USE_GPU = False
place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
main_program = merge(teacher_program, student_program, data_name_map, place)
with fluid.program_guard(main_program):
merge(teacher_program, student_program, data_name_map, place)
with fluid.program_guard(student_program):
distillation_loss = dist.fsp_loss('teacher_t1.tmp_1', 'teacher_t2.tmp_1',
's1.tmp_1', 's2.tmp_1', main_program)
's1.tmp_1', 's2.tmp_1', main_program)
```


Expand All @@ -91,7 +91,7 @@ paddleslim.dist.l2_loss(teacher_var_name, student_var_name, program=fluid.defaul

**参数:**

- **teacher_var_name**(str): teacher_var的名称.
- **teacher_var_name**(str): teacher_var的名称.
- **student_var_name**(str): student_var的名称.
- **program**(Program): 用于蒸馏训练的fluid program。默认值:[*fluid.default_main_program()*](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.3/api_cn/fluid_cn.html#default-main-program)

Expand All @@ -116,10 +116,10 @@ with fluid.program_guard(teacher_program):
data_name_map = {'y':'x'}
USE_GPU = False
place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
main_program = merge(teacher_program, student_program, data_name_map, place)
with fluid.program_guard(main_program):
merge(teacher_program, student_program, data_name_map, place)
with fluid.program_guard(student_program):
distillation_loss = dist.l2_loss('teacher_t2.tmp_1', 's2.tmp_1',
main_program)
main_program)
```


Expand All @@ -131,11 +131,11 @@ paddleslim.dist.soft_label_loss(teacher_var_name, student_var_name, program=flui

**参数:**

- **teacher_var_name**(str): teacher_var的名称.
- **student_var_name**(str): student_var的名称.
- **teacher_var_name**(str): teacher_var的名称.
- **student_var_name**(str): student_var的名称.
- **program**(Program): 用于蒸馏训练的fluid program。默认值:[*fluid.default_main_program()*](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.3/api_cn/fluid_cn.html#default-main-program)
- **teacher_temperature**(float): 对teacher_var进行soft操作的温度值,温度值越大得到的特征图越平滑
- **student_temperature**(float): 对student_var进行soft操作的温度值,温度值越大得到的特征图越平滑
- **teacher_temperature**(float): 对teacher_var进行soft操作的温度值,温度值越大得到的特征图越平滑
- **student_temperature**(float): 对student_var进行soft操作的温度值,温度值越大得到的特征图越平滑

**返回:** 由teacher_var, student_var组合得到的soft_label_loss

Expand All @@ -158,10 +158,10 @@ with fluid.program_guard(teacher_program):
data_name_map = {'y':'x'}
USE_GPU = False
place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
main_program = merge(teacher_program, student_program, data_name_map, place)
with fluid.program_guard(main_program):
merge(teacher_program, student_program, data_name_map, place)
with fluid.program_guard(student_program):
distillation_loss = dist.soft_label_loss('teacher_t2.tmp_1',
's2.tmp_1', main_program, 1., 1.)
's2.tmp_1', main_program, 1., 1.)
```


Expand All @@ -173,7 +173,7 @@ paddleslim.dist.loss(loss_func, program=fluid.default_main_program(), **kwargs)

**参数:**

- **loss_func**(python function): 自定义的损失函数,输入为teacher var和student var,输出为自定义的loss
- **loss_func**(python function): 自定义的损失函数,输入为teacher var和student var,输出为自定义的loss
- **program**(Program): 用于蒸馏训练的fluid program。默认值:[*fluid.default_main_program()*](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.3/api_cn/fluid_cn.html#default-main-program)
- **\**kwargs**: loss_func输入名与对应variable名称

Expand All @@ -198,15 +198,15 @@ with fluid.program_guard(teacher_program):
data_name_map = {'y':'x'}
USE_GPU = False
place = fluid.CUDAPlace(0) if USE_GPU else fluid.CPUPlace()
main_program = merge(teacher_program, student_program, data_name_map, place)
merge(teacher_program, student_program, data_name_map, place)
def adaptation_loss(t_var, s_var):
teacher_channel = t_var.shape[1]
s_hint = fluid.layers.conv2d(s_var, teacher_channel, 1)
hint_loss = fluid.layers.reduce_mean(fluid.layers.square(s_hint - t_var))
return hint_loss
with fluid.program_guard(main_program):
with fluid.program_guard(student_program):
distillation_loss = dist.loss(main_program, adaptation_loss,
t_var='teacher_t2.tmp_1', s_var='s2.tmp_1')
t_var='teacher_t2.tmp_1', s_var='s2.tmp_1')
```

!!! note "注意事项"
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/tutorials/distillation_demo.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ merge过程操作较多,具体细节请参考[merge API文档](https://paddlep

```python
data_name_map = {'data': 'image'}
student_program = merge(teacher_program, student_program, data_name_map, place)
merge(teacher_program, student_program, data_name_map, place)
```

### 5.添加蒸馏loss
Expand Down
14 changes: 8 additions & 6 deletions paddleslim/dist/single_distiller.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def merge(teacher_program,
paddle run on which device.
scope(Scope): The input scope
name_prefix(str): Name prefix added for all vars of the teacher program.
Return(Program): Merged program.
"""
teacher_program = teacher_program.clone(for_test=True)
for teacher_var in teacher_program.list_vars():
Expand All @@ -51,7 +50,7 @@ def merge(teacher_program,
old_var = scope.var(teacher_var.name).get_tensor()
renamed_var = scope.var(new_name).get_tensor()
renamed_var.set(np.array(old_var), place)

# program var rename
renamed_var = teacher_program.global_block()._rename_var(
teacher_var.name, new_name)
Expand Down Expand Up @@ -84,11 +83,13 @@ def merge(teacher_program,
attrs[attr_name] = op.attr(attr_name)
student_program.global_block().append_op(
type=op.type, inputs=inputs, outputs=outputs, attrs=attrs)
return student_program


def fsp_loss(teacher_var1_name, teacher_var2_name, student_var1_name,
student_var2_name, program=fluid.default_main_program()):
def fsp_loss(teacher_var1_name,
teacher_var2_name,
student_var1_name,
student_var2_name,
program=fluid.default_main_program()):
"""
Combine variables from student model and teacher model by fsp-loss.
Args:
Expand All @@ -115,7 +116,8 @@ def fsp_loss(teacher_var1_name, teacher_var2_name, student_var1_name,
return fsp_loss


def l2_loss(teacher_var_name, student_var_name,
def l2_loss(teacher_var_name,
student_var_name,
program=fluid.default_main_program()):
"""
Combine variables from student model and teacher model by l2-loss.
Expand Down

0 comments on commit 00f971c

Please sign in to comment.