Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
Merge pull request #1 from Superjomn/fast
Browse files Browse the repository at this point in the history
merge fast to master
  • Loading branch information
kolinwei committed Apr 27, 2018
2 parents f207856 + a94a042 commit 6e4072f
Show file tree
Hide file tree
Showing 74 changed files with 5,481 additions and 44 deletions.
13 changes: 13 additions & 0 deletions __ocr_recognition/continuous_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi

train_avg_loss_kpi = CostKpi('train_avg_loss', 0.2, 0)
train_seq_err_kpi = CostKpi('train_seq_err', 0.2, 0)


tracking_kpis = [
train_avg_loss_kpi,
train_seq_err_kpi,
]
221 changes: 221 additions & 0 deletions __ocr_recognition/crnn_ctc_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import paddle.fluid as fluid


def conv_bn_pool(input,
group,
out_ch,
act="relu",
param=None,
bias=None,
param_0=None,
is_test=False):
tmp = input
for i in xrange(group):
tmp = fluid.layers.conv2d(
input=tmp,
num_filters=out_ch[i],
filter_size=3,
padding=1,
param_attr=param if param_0 is None else param_0,
act=None, # LinearActivation
use_cudnn=True)
tmp = fluid.layers.batch_norm(
input=tmp,
act=act,
param_attr=param,
bias_attr=bias,
is_test=is_test)
tmp = fluid.layers.pool2d(
input=tmp,
pool_size=2,
pool_type='max',
pool_stride=2,
use_cudnn=True,
ceil_mode=True)

return tmp


def ocr_convs(input,
num,
with_bn,
regularizer=None,
gradient_clip=None,
is_test=False):
assert (num % 4 == 0)

b = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.0))
w0 = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.0005))
w1 = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.01))
tmp = input
tmp = conv_bn_pool(
tmp, 2, [16, 16], param=w1, bias=b, param_0=w0, is_test=is_test)

tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b, is_test=is_test)
tmp = conv_bn_pool(tmp, 2, [128, 128], param=w1, bias=b, is_test=is_test)
return tmp


def encoder_net(images,
num_classes,
rnn_hidden_size=200,
regularizer=None,
gradient_clip=None,
is_test=False):
conv_features = ocr_convs(
images,
8,
True,
regularizer=regularizer,
gradient_clip=gradient_clip,
is_test=is_test)
sliced_feature = fluid.layers.im2sequence(
input=conv_features,
stride=[1, 1],
filter_size=[conv_features.shape[2], 1])

para_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.02))
bias_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.02),
learning_rate=2.0)
bias_attr_nobias = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.02))

fc_1 = fluid.layers.fc(input=sliced_feature,
size=rnn_hidden_size * 3,
param_attr=para_attr,
bias_attr=bias_attr_nobias)
fc_2 = fluid.layers.fc(input=sliced_feature,
size=rnn_hidden_size * 3,
param_attr=para_attr,
bias_attr=bias_attr_nobias)

gru_forward = fluid.layers.dynamic_gru(
input=fc_1,
size=rnn_hidden_size,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu')
gru_backward = fluid.layers.dynamic_gru(
input=fc_2,
size=rnn_hidden_size,
is_reverse=True,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu')

w_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.02))
b_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.0))

fc_out = fluid.layers.fc(input=[gru_forward, gru_backward],
size=num_classes + 1,
param_attr=w_attr,
bias_attr=b_attr)

return fc_out


def ctc_train_net(images, label, args, num_classes):
regularizer = fluid.regularizer.L2Decay(args.l2)
gradient_clip = None
if args.parallel:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places, use_nccl=True)
with pd.do():
images_ = pd.read_input(images)
label_ = pd.read_input(label)

fc_out = encoder_net(
images_,
num_classes,
regularizer=regularizer,
gradient_clip=gradient_clip)

cost = fluid.layers.warpctc(
input=fc_out,
label=label_,
blank=num_classes,
norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost)

decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)

pd.write_output(sum_cost)
pd.write_output(decoded_out)

sum_cost, decoded_out = pd()
sum_cost = fluid.layers.reduce_sum(sum_cost)

else:
fc_out = encoder_net(
images,
num_classes,
regularizer=regularizer,
gradient_clip=gradient_clip)

cost = fluid.layers.warpctc(
input=fc_out, label=label, blank=num_classes, norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)

casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)

inference_program = fluid.default_main_program().clone(for_test=True)

optimizer = fluid.optimizer.Momentum(
learning_rate=args.learning_rate, momentum=args.momentum)
_, params_grads = optimizer.minimize(sum_cost)
model_average = fluid.optimizer.ModelAverage(
args.average_window,
params_grads,
min_average_window=args.min_average_window,
max_average_window=args.max_average_window)

return sum_cost, error_evaluator, inference_program, model_average


def ctc_infer(images, num_classes):
fc_out = encoder_net(images, num_classes, is_test=True)
return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)


def ctc_eval(images, label, num_classes):
fc_out = encoder_net(images, num_classes, is_test=True)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)

casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)

cost = fluid.layers.warpctc(
input=fc_out, label=label, blank=num_classes, norm_by_times=True)

return error_evaluator, cost
Loading

0 comments on commit 6e4072f

Please sign in to comment.